diff --git a/datalab/datalab_session/analysis/get_tif.py b/datalab/datalab_session/analysis/get_tif.py index b867e6b..bac6cbb 100644 --- a/datalab/datalab_session/analysis/get_tif.py +++ b/datalab/datalab_session/analysis/get_tif.py @@ -1,4 +1,5 @@ -from datalab.datalab_session.util import create_tif, get_fits, add_file_to_bucket, key_exists, get_s3_url +from datalab.datalab_session.file_utils import create_tif, get_fits +from datalab.datalab_session.s3_utils import key_exists, add_file_to_bucket, get_s3_url def get_tif(input: dict): """ diff --git a/datalab/datalab_session/analysis/line_profile.py b/datalab/datalab_session/analysis/line_profile.py index ae86f79..05c7cb2 100644 --- a/datalab/datalab_session/analysis/line_profile.py +++ b/datalab/datalab_session/analysis/line_profile.py @@ -2,8 +2,7 @@ from astropy.wcs import WCS from astropy.wcs import WcsError -from datalab.datalab_session.util import scale_points -from datalab.datalab_session.util import get_hdu +from datalab.datalab_session.file_utils import scale_points, get_hdu # For creating an array of brightness along a user drawn line def line_profile(input: dict): diff --git a/datalab/datalab_session/analysis/source_catalog.py b/datalab/datalab_session/analysis/source_catalog.py index d7d56a8..c64b767 100644 --- a/datalab/datalab_session/analysis/source_catalog.py +++ b/datalab/datalab_session/analysis/source_catalog.py @@ -1,7 +1,6 @@ import numpy as np -from datalab.datalab_session.util import get_hdu, scale_points - +from datalab.datalab_session.file_utils import get_hdu, scale_points def source_catalog(input: dict): """ diff --git a/datalab/datalab_session/data_operations/data_operation.py b/datalab/datalab_session/data_operations/data_operation.py index ae93f5d..5f9fc87 100644 --- a/datalab/datalab_session/data_operations/data_operation.py +++ b/datalab/datalab_session/data_operations/data_operation.py @@ -6,7 +6,7 @@ import numpy as np from datalab.datalab_session.tasks import execute_data_operation -from datalab.datalab_session.util import get_hdu +from datalab.datalab_session.file_utils import get_hdu CACHE_DURATION = 60 * 60 * 24 * 30 # cache for 30 days diff --git a/datalab/datalab_session/data_operations/median.py b/datalab/datalab_session/data_operations/median.py index 732d948..0d98aff 100644 --- a/datalab/datalab_session/data_operations/median.py +++ b/datalab/datalab_session/data_operations/median.py @@ -3,7 +3,8 @@ import numpy as np from datalab.datalab_session.data_operations.data_operation import BaseDataOperation -from datalab.datalab_session.util import create_fits, stack_arrays, create_jpgs, save_fits_and_thumbnails +from datalab.datalab_session.file_utils import create_fits, stack_arrays, create_jpgs +from datalab.datalab_session.s3_utils import save_fits_and_thumbnails log = logging.getLogger() log.setLevel(logging.INFO) diff --git a/datalab/datalab_session/data_operations/rgb_stack.py b/datalab/datalab_session/data_operations/rgb_stack.py index 6c8e8ce..9682fc4 100644 --- a/datalab/datalab_session/data_operations/rgb_stack.py +++ b/datalab/datalab_session/data_operations/rgb_stack.py @@ -3,7 +3,8 @@ from astropy.io import fits from datalab.datalab_session.data_operations.data_operation import BaseDataOperation -from datalab.datalab_session.util import get_fits, stack_arrays, create_fits, save_fits_and_thumbnails, create_jpgs +from datalab.datalab_session.file_utils import get_fits, stack_arrays, create_fits, create_jpgs +from datalab.datalab_session.s3_utils import save_fits_and_thumbnails log = logging.getLogger() log.setLevel(logging.INFO) diff --git a/datalab/datalab_session/file_utils.py b/datalab/datalab_session/file_utils.py new file mode 100644 index 0000000..1a16962 --- /dev/null +++ b/datalab/datalab_session/file_utils.py @@ -0,0 +1,114 @@ +import tempfile +import logging + +from astropy.io import fits +import numpy as np +from fits2image.conversions import fits_to_jpg, fits_to_tif + +from datalab.datalab_session.s3_utils import get_fits, add_file_to_bucket + +log = logging.getLogger() +log.setLevel(logging.INFO) + +def get_hdu(basename: str, extension: str = 'SCI', source: str = 'archive') -> list[fits.HDUList]: + """ + Returns a HDU for the given basename from the source + Will download the file to a tmp directory so future calls can open it directly + Warning: this function returns an opened file that must be closed after use + """ + + basename_file_path = get_fits(basename, source) + + hdu = fits.open(basename_file_path) + try: + extension = hdu[extension] + except KeyError: + raise KeyError(f"{extension} Header not found in fits file {basename}") + + return extension + +def get_fits_dimensions(fits_file, extension: str = 'SCI') -> tuple: + return fits.open(fits_file)[extension].shape + +def create_fits(key: str, image_arr: np.ndarray) -> str: + """ + Creates a fits file with the given key and image array + Returns the the path to the fits_file + """ + + header = fits.Header([('KEY', key)]) + primary_hdu = fits.PrimaryHDU(header=header) + image_hdu = fits.ImageHDU(data=image_arr, name='SCI') + + hdu_list = fits.HDUList([primary_hdu, image_hdu]) + fits_path = tempfile.NamedTemporaryFile(suffix=f'{key}.fits').name + hdu_list.writeto(fits_path) + + return fits_path + +def create_tif(key: str, fits_path: np.ndarray) -> str: + """ + Creates a full sized TIFF file from a FITs + Returns the path to the TIFF file + """ + height, width = get_fits_dimensions(fits_path) + tif_path = tempfile.NamedTemporaryFile(suffix=f'{key}.tif').name + fits_to_tif(fits_path, tif_path, width=width, height=height) + + return tif_path + +def create_jpgs(cache_key, fits_paths: str, color=False) -> list: + """ + Create jpgs from fits files and save them to S3 + If using the color option fits_paths need to be in order R, G, B + percent and cur_percent are used to update the progress of the operation + """ + + if not isinstance(fits_paths, list): + fits_paths = [fits_paths] + + # create the jpgs from the fits files + large_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-large.jpg').name + thumbnail_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-small.jpg').name + + max_height, max_width = max(get_fits_dimensions(path) for path in fits_paths) + + fits_to_jpg(fits_paths, large_jpg_path, width=max_width, height=max_height, color=color) + fits_to_jpg(fits_paths, thumbnail_jpg_path, color=color) + + return large_jpg_path, thumbnail_jpg_path + +def stack_arrays(array_list: list): + """ + Takes a list of numpy arrays, crops them to an equal shape, and stacks them to be a 3d numpy array + + """ + min_shape = min(arr.shape for arr in array_list) + cropped_data_list = [arr[:min_shape[0], :min_shape[1]] for arr in array_list] + + stacked = np.stack(cropped_data_list, axis=2) + + return stacked + +def scale_points(height_1: int, width_1: int, height_2: int, width_2: int, x_points=[], y_points=[], flip_y = False, flip_x = False): + """ + Scales x_points and y_points from img_1 height and width to img_2 height and width + Optionally flips the points on the x or y axis + """ + if any([dim == 0 for dim in [height_1, width_1, height_2, width_2]]): + raise ValueError("height and width must be non-zero") + + # normalize the points to be lists in case tuples or other are passed + x_points = np.array(x_points) + y_points = np.array(y_points) + + x_points = (x_points / width_1 * width_2).astype(int) + y_points = (y_points / height_1 * height_2).astype(int) + + if flip_y: + y_points = height_2 - y_points + + if flip_x: + x_points = width_2 - x_points + + return x_points, y_points diff --git a/datalab/datalab_session/util.py b/datalab/datalab_session/s3_utils.py similarity index 55% rename from datalab/datalab_session/util.py rename to datalab/datalab_session/s3_utils.py index 625b3a6..a27b119 100644 --- a/datalab/datalab_session/util.py +++ b/datalab/datalab_session/s3_utils.py @@ -1,16 +1,12 @@ -import tempfile -import requests import logging +import requests import os import urllib.request import boto3 -from astropy.io import fits -import numpy as np from botocore.exceptions import ClientError from django.conf import settings -from fits2image.conversions import fits_to_jpg, fits_to_tif log = logging.getLogger() log.setLevel(logging.INFO) @@ -115,7 +111,7 @@ def get_archive_url(basename: str, archive: str = settings.ARCHIVE_API) -> dict: def get_fits(basename: str, source: str = 'archive'): """ - Returns a Fits File for the given basename from the source + Returns a Fits File for the given basename from the source bucket """ basename = basename.replace('-large', '').replace('-small', '') basename_file_path = os.path.join(settings.TEMP_FITS_DIR, basename) @@ -140,76 +136,9 @@ def get_fits(basename: str, source: str = 'archive'): return basename_file_path -def get_hdu(basename: str, extension: str = 'SCI', source: str = 'archive') -> list[fits.HDUList]: - """ - Returns a HDU for the given basename from the source - Will download the file to a tmp directory so future calls can open it directly - Warning: this function returns an opened file that must be closed after use - """ - - basename_file_path = get_fits(basename, source) - - hdu = fits.open(basename_file_path) - try: - extension = hdu[extension] - except KeyError: - raise KeyError(f"{extension} Header not found in fits file {basename}") - - return extension - -def get_fits_dimensions(fits_file, extension: str = 'SCI') -> tuple: - return fits.open(fits_file)[extension].shape - -def create_fits(key: str, image_arr: np.ndarray) -> str: - """ - Creates a fits file with the given key and image array - Returns the the path to the fits_file - """ - - header = fits.Header([('KEY', key)]) - primary_hdu = fits.PrimaryHDU(header=header) - image_hdu = fits.ImageHDU(data=image_arr, name='SCI') - - hdu_list = fits.HDUList([primary_hdu, image_hdu]) - fits_path = tempfile.NamedTemporaryFile(suffix=f'{key}.fits').name - hdu_list.writeto(fits_path) - - return fits_path - -def create_tif(key: str, fits_path: np.ndarray) -> str: - """ - Creates a full sized TIFF file from a FITs - """ - height, width = get_fits_dimensions(fits_path) - tif_path = tempfile.NamedTemporaryFile(suffix=f'{key}.tif').name - fits_to_tif(fits_path, tif_path, width=width, height=height) - - return tif_path - -def create_jpgs(cache_key, fits_paths: str, color=False) -> list: - """ - Create jpgs from fits files and save them to S3 - If using the color option fits_paths need to be in order R, G, B - percent and cur_percent are used to update the progress of the operation - """ - - if not isinstance(fits_paths, list): - fits_paths = [fits_paths] - - # create the jpgs from the fits files - large_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-large.jpg').name - thumbnail_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-small.jpg').name - - max_height, max_width = max(get_fits_dimensions(path) for path in fits_paths) - - fits_to_jpg(fits_paths, large_jpg_path, width=max_width, height=max_height, color=color) - fits_to_jpg(fits_paths, thumbnail_jpg_path, color=color) - - return large_jpg_path, thumbnail_jpg_path - def save_fits_and_thumbnails(cache_key, fits_path, large_jpg_path, thumbnail_jpg_path, index=None): """ - Save Fits and Thumbnails in S3 Buckets, Returns the URLs in an output object + Save Fits and Thumbnails in S3 Buckets, Returns the URLs in an data operation output ready object """ bucket_key = f'{cache_key}/{cache_key}-{index}' if index else f'{cache_key}/{cache_key}' @@ -226,38 +155,3 @@ def save_fits_and_thumbnails(cache_key, fits_path, large_jpg_path, thumbnail_jpg ) return output_file - -def stack_arrays(array_list: list): - """ - Takes a list of numpy arrays, crops them to an equal shape, and stacks them to be a 3d numpy array - - """ - min_shape = min(arr.shape for arr in array_list) - cropped_data_list = [arr[:min_shape[0], :min_shape[1]] for arr in array_list] - - stacked = np.stack(cropped_data_list, axis=2) - - return stacked - -def scale_points(height_1: int, width_1: int, height_2: int, width_2: int, x_points=[], y_points=[], flip_y = False, flip_x = False): - """ - Scales x_points and y_points from img_1 height and width to img_2 height and width - Optionally flips the points on the x or y axis - """ - if any([dim == 0 for dim in [height_1, width_1, height_2, width_2]]): - raise ValueError("height and width must be non-zero") - - # normalize the points to be lists in case tuples or other are passed - x_points = np.array(x_points) - y_points = np.array(y_points) - - x_points = (x_points / width_1 * width_2).astype(int) - y_points = (y_points / height_1 * height_2).astype(int) - - if flip_y: - y_points = height_2 - y_points - - if flip_x: - x_points = width_2 - x_points - - return x_points, y_points