Skip to content

Commit

Permalink
Merge pull request #24 from LCOGT/update/util-organization-comments
Browse files Browse the repository at this point in the history
split util into s3_utils and file_utils, updated imports, comment changes
  • Loading branch information
LTDakin authored Aug 21, 2024
2 parents 4737230 + c1a6866 commit 5eb56fc
Show file tree
Hide file tree
Showing 8 changed files with 126 additions and 117 deletions.
3 changes: 2 additions & 1 deletion datalab/datalab_session/analysis/get_tif.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datalab.datalab_session.util import create_tif, get_fits, add_file_to_bucket, key_exists, get_s3_url
from datalab.datalab_session.file_utils import create_tif, get_fits
from datalab.datalab_session.s3_utils import key_exists, add_file_to_bucket, get_s3_url

def get_tif(input: dict):
"""
Expand Down
3 changes: 1 addition & 2 deletions datalab/datalab_session/analysis/line_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
from astropy.wcs import WCS
from astropy.wcs import WcsError

from datalab.datalab_session.util import scale_points
from datalab.datalab_session.util import get_hdu
from datalab.datalab_session.file_utils import scale_points, get_hdu

# For creating an array of brightness along a user drawn line
def line_profile(input: dict):
Expand Down
3 changes: 1 addition & 2 deletions datalab/datalab_session/analysis/source_catalog.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import numpy as np

from datalab.datalab_session.util import get_hdu, scale_points

from datalab.datalab_session.file_utils import get_hdu, scale_points

def source_catalog(input: dict):
"""
Expand Down
2 changes: 1 addition & 1 deletion datalab/datalab_session/data_operations/data_operation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np

from datalab.datalab_session.tasks import execute_data_operation
from datalab.datalab_session.util import get_hdu
from datalab.datalab_session.file_utils import get_hdu

CACHE_DURATION = 60 * 60 * 24 * 30 # cache for 30 days

Expand Down
3 changes: 2 additions & 1 deletion datalab/datalab_session/data_operations/median.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import numpy as np

from datalab.datalab_session.data_operations.data_operation import BaseDataOperation
from datalab.datalab_session.util import create_fits, stack_arrays, create_jpgs, save_fits_and_thumbnails
from datalab.datalab_session.file_utils import create_fits, stack_arrays, create_jpgs
from datalab.datalab_session.s3_utils import save_fits_and_thumbnails

log = logging.getLogger()
log.setLevel(logging.INFO)
Expand Down
3 changes: 2 additions & 1 deletion datalab/datalab_session/data_operations/rgb_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from astropy.io import fits

from datalab.datalab_session.data_operations.data_operation import BaseDataOperation
from datalab.datalab_session.util import get_fits, stack_arrays, create_fits, save_fits_and_thumbnails, create_jpgs
from datalab.datalab_session.file_utils import get_fits, stack_arrays, create_fits, create_jpgs
from datalab.datalab_session.s3_utils import save_fits_and_thumbnails

log = logging.getLogger()
log.setLevel(logging.INFO)
Expand Down
114 changes: 114 additions & 0 deletions datalab/datalab_session/file_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import tempfile
import logging

from astropy.io import fits
import numpy as np
from fits2image.conversions import fits_to_jpg, fits_to_tif

from datalab.datalab_session.s3_utils import get_fits, add_file_to_bucket

log = logging.getLogger()
log.setLevel(logging.INFO)

def get_hdu(basename: str, extension: str = 'SCI', source: str = 'archive') -> list[fits.HDUList]:
"""
Returns a HDU for the given basename from the source
Will download the file to a tmp directory so future calls can open it directly
Warning: this function returns an opened file that must be closed after use
"""

basename_file_path = get_fits(basename, source)

hdu = fits.open(basename_file_path)
try:
extension = hdu[extension]
except KeyError:
raise KeyError(f"{extension} Header not found in fits file {basename}")

return extension

def get_fits_dimensions(fits_file, extension: str = 'SCI') -> tuple:
return fits.open(fits_file)[extension].shape

def create_fits(key: str, image_arr: np.ndarray) -> str:
"""
Creates a fits file with the given key and image array
Returns the the path to the fits_file
"""

header = fits.Header([('KEY', key)])
primary_hdu = fits.PrimaryHDU(header=header)
image_hdu = fits.ImageHDU(data=image_arr, name='SCI')

hdu_list = fits.HDUList([primary_hdu, image_hdu])
fits_path = tempfile.NamedTemporaryFile(suffix=f'{key}.fits').name
hdu_list.writeto(fits_path)

return fits_path

def create_tif(key: str, fits_path: np.ndarray) -> str:
"""
Creates a full sized TIFF file from a FITs
Returns the path to the TIFF file
"""
height, width = get_fits_dimensions(fits_path)
tif_path = tempfile.NamedTemporaryFile(suffix=f'{key}.tif').name
fits_to_tif(fits_path, tif_path, width=width, height=height)

return tif_path

def create_jpgs(cache_key, fits_paths: str, color=False) -> list:
"""
Create jpgs from fits files and save them to S3
If using the color option fits_paths need to be in order R, G, B
percent and cur_percent are used to update the progress of the operation
"""

if not isinstance(fits_paths, list):
fits_paths = [fits_paths]

# create the jpgs from the fits files
large_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-large.jpg').name
thumbnail_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-small.jpg').name

max_height, max_width = max(get_fits_dimensions(path) for path in fits_paths)

fits_to_jpg(fits_paths, large_jpg_path, width=max_width, height=max_height, color=color)
fits_to_jpg(fits_paths, thumbnail_jpg_path, color=color)

return large_jpg_path, thumbnail_jpg_path

def stack_arrays(array_list: list):
"""
Takes a list of numpy arrays, crops them to an equal shape, and stacks them to be a 3d numpy array
"""
min_shape = min(arr.shape for arr in array_list)
cropped_data_list = [arr[:min_shape[0], :min_shape[1]] for arr in array_list]

stacked = np.stack(cropped_data_list, axis=2)

return stacked

def scale_points(height_1: int, width_1: int, height_2: int, width_2: int, x_points=[], y_points=[], flip_y = False, flip_x = False):
"""
Scales x_points and y_points from img_1 height and width to img_2 height and width
Optionally flips the points on the x or y axis
"""
if any([dim == 0 for dim in [height_1, width_1, height_2, width_2]]):
raise ValueError("height and width must be non-zero")

# normalize the points to be lists in case tuples or other are passed
x_points = np.array(x_points)
y_points = np.array(y_points)

x_points = (x_points / width_1 * width_2).astype(int)
y_points = (y_points / height_1 * height_2).astype(int)

if flip_y:
y_points = height_2 - y_points

if flip_x:
x_points = width_2 - x_points

return x_points, y_points
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
import tempfile
import requests
import logging
import requests
import os
import urllib.request

import boto3
from astropy.io import fits
import numpy as np
from botocore.exceptions import ClientError

from django.conf import settings
from fits2image.conversions import fits_to_jpg, fits_to_tif

log = logging.getLogger()
log.setLevel(logging.INFO)
Expand Down Expand Up @@ -115,7 +111,7 @@ def get_archive_url(basename: str, archive: str = settings.ARCHIVE_API) -> dict:

def get_fits(basename: str, source: str = 'archive'):
"""
Returns a Fits File for the given basename from the source
Returns a Fits File for the given basename from the source bucket
"""
basename = basename.replace('-large', '').replace('-small', '')
basename_file_path = os.path.join(settings.TEMP_FITS_DIR, basename)
Expand All @@ -140,76 +136,9 @@ def get_fits(basename: str, source: str = 'archive'):

return basename_file_path

def get_hdu(basename: str, extension: str = 'SCI', source: str = 'archive') -> list[fits.HDUList]:
"""
Returns a HDU for the given basename from the source
Will download the file to a tmp directory so future calls can open it directly
Warning: this function returns an opened file that must be closed after use
"""

basename_file_path = get_fits(basename, source)

hdu = fits.open(basename_file_path)
try:
extension = hdu[extension]
except KeyError:
raise KeyError(f"{extension} Header not found in fits file {basename}")

return extension

def get_fits_dimensions(fits_file, extension: str = 'SCI') -> tuple:
return fits.open(fits_file)[extension].shape

def create_fits(key: str, image_arr: np.ndarray) -> str:
"""
Creates a fits file with the given key and image array
Returns the the path to the fits_file
"""

header = fits.Header([('KEY', key)])
primary_hdu = fits.PrimaryHDU(header=header)
image_hdu = fits.ImageHDU(data=image_arr, name='SCI')

hdu_list = fits.HDUList([primary_hdu, image_hdu])
fits_path = tempfile.NamedTemporaryFile(suffix=f'{key}.fits').name
hdu_list.writeto(fits_path)

return fits_path

def create_tif(key: str, fits_path: np.ndarray) -> str:
"""
Creates a full sized TIFF file from a FITs
"""
height, width = get_fits_dimensions(fits_path)
tif_path = tempfile.NamedTemporaryFile(suffix=f'{key}.tif').name
fits_to_tif(fits_path, tif_path, width=width, height=height)

return tif_path

def create_jpgs(cache_key, fits_paths: str, color=False) -> list:
"""
Create jpgs from fits files and save them to S3
If using the color option fits_paths need to be in order R, G, B
percent and cur_percent are used to update the progress of the operation
"""

if not isinstance(fits_paths, list):
fits_paths = [fits_paths]

# create the jpgs from the fits files
large_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-large.jpg').name
thumbnail_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-small.jpg').name

max_height, max_width = max(get_fits_dimensions(path) for path in fits_paths)

fits_to_jpg(fits_paths, large_jpg_path, width=max_width, height=max_height, color=color)
fits_to_jpg(fits_paths, thumbnail_jpg_path, color=color)

return large_jpg_path, thumbnail_jpg_path

def save_fits_and_thumbnails(cache_key, fits_path, large_jpg_path, thumbnail_jpg_path, index=None):
"""
Save Fits and Thumbnails in S3 Buckets, Returns the URLs in an output object
Save Fits and Thumbnails in S3 Buckets, Returns the URLs in an data operation output ready object
"""
bucket_key = f'{cache_key}/{cache_key}-{index}' if index else f'{cache_key}/{cache_key}'

Expand All @@ -226,38 +155,3 @@ def save_fits_and_thumbnails(cache_key, fits_path, large_jpg_path, thumbnail_jpg
)

return output_file

def stack_arrays(array_list: list):
"""
Takes a list of numpy arrays, crops them to an equal shape, and stacks them to be a 3d numpy array
"""
min_shape = min(arr.shape for arr in array_list)
cropped_data_list = [arr[:min_shape[0], :min_shape[1]] for arr in array_list]

stacked = np.stack(cropped_data_list, axis=2)

return stacked

def scale_points(height_1: int, width_1: int, height_2: int, width_2: int, x_points=[], y_points=[], flip_y = False, flip_x = False):
"""
Scales x_points and y_points from img_1 height and width to img_2 height and width
Optionally flips the points on the x or y axis
"""
if any([dim == 0 for dim in [height_1, width_1, height_2, width_2]]):
raise ValueError("height and width must be non-zero")

# normalize the points to be lists in case tuples or other are passed
x_points = np.array(x_points)
y_points = np.array(y_points)

x_points = (x_points / width_1 * width_2).astype(int)
y_points = (y_points / height_1 * height_2).astype(int)

if flip_y:
y_points = height_2 - y_points

if flip_x:
x_points = width_2 - x_points

return x_points, y_points

0 comments on commit 5eb56fc

Please sign in to comment.