Skip to content

Commit

Permalink
Add capabilities for tif downloads
Browse files Browse the repository at this point in the history
  • Loading branch information
timbeccue committed Feb 15, 2022
1 parent fe7ff96 commit 582c8b8
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 18 deletions.
6 changes: 3 additions & 3 deletions api/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ def filtered_images_query_handler(event, context):
return http_response(HTTPStatus.NOT_FOUND, error_msg)
except Exception as e:
logger.exception("Error in filter images query. ")
return http_response(HTTPStatus.NOT_FOUND, error_msg)
return http_response(HTTPStatus.NOT_FOUND, e)

return http_response(HTTPStatus.OK, images)

Expand All @@ -455,6 +455,6 @@ def remove_image_by_filename_handler(event, context):
except Exception as e:
error_msg = f"Could not delete {base_filename}. Error: {e}"
logger.exception(error_msg)
return http_response(HTTPStatus.NOT_FOUND, error_msg)
return http_response(HTTPStatus.NOT_FOUND, e)

return http_response(HTTPStatus.OK, image)
return http_response(HTTPStatus.OK, f'Successfully removed {base_filename}')
66 changes: 54 additions & 12 deletions api/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from api.helpers import dynamodb_r
from api.helpers import DecimalEncoder, http_response, _get_body, _get_secret, get_db_connection
from api.helpers import get_base_filename_from_full_filename
from api.helpers import get_s3_file_url
from api.s3_helpers import save_tiff_to_s3

from api.db import get_files_within_date_range

Expand Down Expand Up @@ -145,7 +147,21 @@ def upload(event, context):


def download(event, context):
log.info(json.dumps(event, indent=2))
""" This method is used to handle requests to download individual data files.
Request body args:
s3_directory (str): data | info-images | allsky | test, specifies the s3 object prefix (ie folder)
where the data is stored. Default is 'data'.
object_name (str): the full filename of the requested file. Appending this to the end of s3_directory
should specify the full key for the object in s3.
image_type (str): tif | fits, used if the requester wants a tif file created from the underlying fits
image. If so, the tif file is create on the fly. Default is 'fits'.
stretch (str): linear | arcsinh, used to specify the stretch parameters if a tif file is requested.
Default is 'arcsinh'.
Return: (str) presigned s3 download url that the requester can use to access the file.
"""
log.info(event)
body = _get_body(event)

# retrieve and validate the s3_directory
Expand All @@ -155,28 +171,48 @@ def download(event, context):
log.warning(error_msg)
return http_response(HTTPStatus.FORBIDDEN, error_msg)

key = f"{s3_directory}/{body['object_name']}"
key = f"{s3_directory}/{body['object_name']}" # full path to object in s3 bucket
params = {
"Bucket": BUCKET_NAME,
"Key": key,
}
url = s3.generate_presigned_url(
ClientMethod='get_object',
Params=params,
ExpiresIn=S3_GET_TTL
)
log.info(f"Presigned download url: {url}")
return http_response(HTTPStatus.OK, str(url))

image_type = body.get('image_type', 'fits') # assume 'tif' if not otherwise specified

# Routine if TIFF file is specified
if image_type in ['tif', 'tiff']:
stretch = body.get('stretch', 'arcsinh')
#s3_destination_key = f"downloads/tif/{body['object_name']}"
s3_destination_key = save_tiff_to_s3(BUCKET_NAME, key, stretch)
url = get_s3_file_url(s3_destination_key)
log.info(f"Presigned download url: {url}")
return http_response(HTTPStatus.OK, str(url))

# if TIFF file not requested, just get the file as-is from s3
else:
url = s3.generate_presigned_url(
ClientMethod='get_object',
Params=params,
ExpiresIn=S3_GET_TTL
)
log.info(f"Presigned download url: {url}")
return http_response(HTTPStatus.OK, str(url))


def download_zip(event, context):
""" This method returns a link to download a zip of multiple images in fits format.
First, get a list of files to be zipped based on the query parameters specified.
Next, call a lambda function (defined in the repository zip-downloads) that creates a zip
from the list of specified files and uploads that back to s3, returning a presigned download url.
Finally, this function returns the url in the http response to the requester.
"""

pprint(event)
body = _get_body(event)
pprint(body)

start_timestamp_s = int(body.get('start_timestamp_s'))
end_timestamp_s = int(body.get('end_timestamp_s'))
fits_size = body.get('fits_size')
fits_size = body.get('fits_size') # small | large | best
site = body.get('site')

files = get_files_within_date_range(site, start_timestamp_s, end_timestamp_s, fits_size)
Expand Down Expand Up @@ -210,8 +246,14 @@ def download_zip(event, context):
pprint(logs)
return http_response(HTTPStatus.OK, zip_url)


def get_recent_uploads(event, context):

""" Query for a list of files recently uploaded to s3.
The logs routine is found in the ptrdata repository, in which a lambda funciton is triggered for new objects
in the s3 bucket with prefix 'data/' (where all the regular site data is sent).
This is mainly used for easier debugging, and is displayed in the PTR web UI.
"""
print("Query string params: ", event['queryStringParameters'])
try:
site = event['queryStringParameters']['site']
Expand Down
2 changes: 1 addition & 1 deletion api/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,4 @@ def get_s3_file_url(path, ttl=604800):
Params={"Bucket": BUCKET_NAME, "Key": path},
ExpiresIn=ttl
)
return url
return url
74 changes: 74 additions & 0 deletions api/s3_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import boto3
import uuid
from astropy.io import fits
import numpy
import tifffile
import logging
import bz2
import os

s3_client = boto3.client('s3')
log = logging.getLogger()
log.setLevel(logging.INFO)

def save_tiff_to_s3(bucket, s3_source_key, stretch):
tmpkey = s3_source_key.replace('/', '')
local_source_file_path = f"/tmp/{uuid.uuid4()}{tmpkey}"
local_tiff_file_path = f"/tmp/tiff-{tmpkey}.tif"
local_tiff_file_path_bz2 = f"/tmp/tiff-{tmpkey}.tif.bz2"

s3_client.download_file(bucket, s3_source_key, local_source_file_path)
image_metadata = create_tiff(local_source_file_path, local_tiff_file_path, stretch)

# generate the name for the item in s3, also the name of the downloaded file
source_filename = s3_source_key.split('/')[-1]
tif_filename = f"{source_filename.split('.')[0]}.tif.bz2"
tif_filename = f"{image_metadata['FILTER']}-{stretch}-{tif_filename}"
s3_destination_key = f"downloads/tif/{tif_filename}"

s3_client.upload_file(local_tiff_file_path_bz2, bucket, s3_destination_key)
return s3_destination_key


def create_tiff(local_source_file_path, local_tiff_file_path, stretch):
with fits.open(local_source_file_path) as hdulist:
prihdr = hdulist[0].header

metadata = {
'PTRName': 'thename',
'FILTER': prihdr['FILTER']
}

# Linear 16bit tif
ts = numpy.asarray(hdulist[0].data)

# Arcsinh 16bit tif (It is an artificial stretch so there is some massaging to get it into a 16 bit tif)
if stretch == "arcsinh":
ts = numpy.arcsinh(ts)
ts = ts - numpy.min(ts)
# rescale it to take all of the integer range
ts = ts * (65535.0/(numpy.max(ts)))
ts = ts.astype(numpy.uint16)

tifffile.imwrite(local_tiff_file_path, ts, metadata=metadata)
to_bz2(local_tiff_file_path)
return metadata


def to_bz2(filename, delete=False):
try:
uncomp = open(filename, 'rb')
comp = bz2.compress(uncomp.read())
uncomp.close()
if delete:
try:
os.remove(filename)
except:
pass
target = open(filename + '.bz2', 'wb')
target.write(comp)
target.close()
return True
except:
log.info('to_bz2 failed.')
return False
7 changes: 7 additions & 0 deletions api/tests/request_events/download_tif.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"resource": "/",
"path": "/",
"httpMethod": "POST",
"key": "val",
"body": "{\"s3_directory\": \"data\", \"object_name\": \"sro-kb001ms-20220208-00000679-EX10.fits.bz2\", \"stretch\": \"arcsinh\", \"image_type\": \"tif\"}"
}
2 changes: 1 addition & 1 deletion api/tests/unit/test_info_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def create_info_image_entry(site, channel, base_filename, data_type, header=None
optional_attributes = {}
if include_fits_01:
optional_attributes['fits_01_exists'] = True
optional_attributes['fits_01_file_path'] = f"info-images/{base_filename}-{data_type}01.fits.bz2"
optional_attributes['fits_01_file_path'] = f"info-images/{base_filename}-{data_type}00.fits.bz2"
if include_fits_10:
optional_attributes['fits_10_exists'] = True
optional_attributes['fits_10_file_path'] = f"info-images/{base_filename}-{data_type}10.fits.bz2"
Expand Down
7 changes: 6 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
astropy==4.3.1
attrs==19.3.0
boto==2.49.0
boto3==1.9.134
botocore==1.12.188
certifi==2019.6.16
Expand All @@ -25,6 +27,7 @@ pluggy==0.13.1
psycopg2-binary==2.8.4
py==1.9.0
pycparser==2.20
pyerfa==2.0.0.1
PyJWT==1.6.4
pyparsing==2.4.7
pytest==6.0.0
Expand All @@ -37,12 +40,14 @@ rope==0.17.0
Rx==3.1.0
s3transfer==0.2.1
sgp4==2.12
Shapely==1.7.1
Shapely==1.7.0
six==1.12.0
skyfield==1.24
SQLAlchemy==1.3.19
tifffile==2021.11.2
toml==0.10.1
tqdm==4.32.2
typing-extensions==3.7.4.3
tzwhere==3.0.3
urllib3==1.25.3
zipp==3.1.0
2 changes: 2 additions & 0 deletions serverless.yml
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ functions:
method: post
download:
handler: api/handler.download
memorySize: 2024
timeout: 60
events:
- http:
path: /download
Expand Down

0 comments on commit 582c8b8

Please sign in to comment.