From 8ac1c7c9778ef9d421c369f33c2b53acd1001ada Mon Sep 17 00:00:00 2001 From: Lloyd Dakin Date: Thu, 1 Aug 2024 13:20:30 -0700 Subject: [PATCH] split create_jpg_outputs into two functions, create_jpg, and save output, moved functions from data operations to util file --- .../data_operations/data_operation.py | 56 +------------------ .../datalab_session/data_operations/median.py | 8 ++- .../data_operations/rgb_stack.py | 16 ++++-- datalab/datalab_session/util.py | 44 ++++++++++++++- 4 files changed, 60 insertions(+), 64 deletions(-) diff --git a/datalab/datalab_session/data_operations/data_operation.py b/datalab/datalab_session/data_operations/data_operation.py index af8df83..ae93f5d 100644 --- a/datalab/datalab_session/data_operations/data_operation.py +++ b/datalab/datalab_session/data_operations/data_operation.py @@ -1,15 +1,12 @@ from abc import ABC, abstractmethod import hashlib import json -import tempfile from django.core.cache import cache -from fits2image.conversions import fits_to_jpg -from astropy.io import fits import numpy as np from datalab.datalab_session.tasks import execute_data_operation -from datalab.datalab_session.util import add_file_to_bucket, get_hdu, get_fits_dimensions, stack_arrays, create_fits +from datalab.datalab_session.util import get_hdu CACHE_DURATION = 60 * 60 * 24 * 30 # cache for 30 days @@ -99,57 +96,6 @@ def set_failed(self, message: str): self.set_status('FAILED') self.set_message(message) - def create_jpg_output(self, fits_paths: str, percent=None, cur_percent=None, color=False, index=None) -> list: - """ - Create jpgs from fits files and save them to S3 - If using the color option fits_paths need to be in order R, G, B - percent and cur_percent are used to update the progress of the operation - """ - - if not isinstance(fits_paths, list): - fits_paths = [fits_paths] - - # create the jpgs from the fits files - large_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{self.cache_key}-large.jpg').name - thumbnail_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{self.cache_key}-small.jpg').name - - max_height, max_width = max(get_fits_dimensions(path) for path in fits_paths) - - fits_to_jpg(fits_paths, large_jpg_path, width=max_width, height=max_height, color=color) - fits_to_jpg(fits_paths, thumbnail_jpg_path, color=color) - - # color photos take three files, so we store it as one fits file with a 3d SCI ndarray - if color: - arrays = [fits.open(file)['SCI'].data for file in fits_paths] - stacked_data = stack_arrays(arrays) - fits_file = create_fits(self.cache_key, stacked_data) - else: - fits_file = fits_paths[0] - - - # Save Fits and Thumbnails in S3 Buckets - bucket_key = f'{self.cache_key}/{self.cache_key}-{index}' if index else f'{self.cache_key}/{self.cache_key}' - - fits_url = add_file_to_bucket(f'{bucket_key}.fits', fits_file) - large_jpg_url = add_file_to_bucket(f'{bucket_key}-large.jpg', large_jpg_path) - thumbnail_jpg_url = add_file_to_bucket(f'{bucket_key}-small.jpg', thumbnail_jpg_path) - - output = [] - output.append({ - 'fits_url': fits_url, - 'large_url': large_jpg_url, - 'thumbnail_url': thumbnail_jpg_url, - 'basename': f'{self.cache_key}', - 'source': 'datalab'} - ) - - if percent is not None and cur_percent is not None: - self.set_percent_completion(cur_percent + percent) - else: - self.set_percent_completion(0.9) - - return output - def get_fits_npdata(self, input_files: list[dict], percent=None, cur_percent=None) -> list[np.memmap]: total_files = len(input_files) image_data_list = [] diff --git a/datalab/datalab_session/data_operations/median.py b/datalab/datalab_session/data_operations/median.py index 55e94fd..732d948 100644 --- a/datalab/datalab_session/data_operations/median.py +++ b/datalab/datalab_session/data_operations/median.py @@ -3,7 +3,7 @@ import numpy as np from datalab.datalab_session.data_operations.data_operation import BaseDataOperation -from datalab.datalab_session.util import create_fits, stack_arrays +from datalab.datalab_session.util import create_fits, stack_arrays, create_jpgs, save_fits_and_thumbnails log = logging.getLogger() log.setLevel(logging.INFO) @@ -54,9 +54,11 @@ def operate(self): fits_file = create_fits(self.cache_key, median) - output = self.create_jpg_output(fits_file, percent=0.6, cur_percent=0.4) + large_jpg_path, small_jpg_path = create_jpgs(self.cache_key, fits_file) - output = {'output_files': output} + output_file = save_fits_and_thumbnails(self.cache_key, fits_file, large_jpg_path, small_jpg_path) + + output = {'output_files': [output_file]} else: output = {'output_files': []} diff --git a/datalab/datalab_session/data_operations/rgb_stack.py b/datalab/datalab_session/data_operations/rgb_stack.py index fa76094..6c8e8ce 100644 --- a/datalab/datalab_session/data_operations/rgb_stack.py +++ b/datalab/datalab_session/data_operations/rgb_stack.py @@ -1,10 +1,9 @@ import logging -import tempfile -from fits2image.conversions import fits_to_jpg +from astropy.io import fits from datalab.datalab_session.data_operations.data_operation import BaseDataOperation -from datalab.datalab_session.util import add_file_to_bucket, get_fits +from datalab.datalab_session.util import get_fits, stack_arrays, create_fits, save_fits_and_thumbnails, create_jpgs log = logging.getLogger() log.setLevel(logging.INFO) @@ -65,9 +64,16 @@ def operate(self): fits_paths.append(get_fits(file.get('basename'))) self.set_percent_completion(self.get_percent_completion() + 0.2) - output = self.create_jpg_output(fits_paths, percent=0.9, cur_percent=0.0, color=True) + large_jpg_path, small_jpg_path = create_jpgs(self.cache_key, fits_paths, color=True) - output = {'output_files': output} + # color photos take three files, so we store it as one fits file with a 3d SCI ndarray + arrays = [fits.open(file)['SCI'].data for file in fits_paths] + stacked_data = stack_arrays(arrays) + fits_file = create_fits(self.cache_key, stacked_data) + + output_file = save_fits_and_thumbnails(self.cache_key, fits_file, large_jpg_path, small_jpg_path) + + output = {'output_files': [output_file]} else: output = {'output_files': []} raise ValueError('RGB Stack operation requires exactly 3 input files') diff --git a/datalab/datalab_session/util.py b/datalab/datalab_session/util.py index 8d43a73..97c0431 100644 --- a/datalab/datalab_session/util.py +++ b/datalab/datalab_session/util.py @@ -7,9 +7,10 @@ import boto3 from astropy.io import fits import numpy as np +from botocore.exceptions import ClientError from django.conf import settings -from botocore.exceptions import ClientError +from fits2image.conversions import fits_to_jpg log = logging.getLogger() log.setLevel(logging.INFO) @@ -175,6 +176,47 @@ def create_fits(key: str, image_arr: np.ndarray) -> str: return fits_path +def create_jpgs(cache_key, fits_paths: str, color=False) -> list: + """ + Create jpgs from fits files and save them to S3 + If using the color option fits_paths need to be in order R, G, B + percent and cur_percent are used to update the progress of the operation + """ + + if not isinstance(fits_paths, list): + fits_paths = [fits_paths] + + # create the jpgs from the fits files + large_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-large.jpg').name + thumbnail_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-small.jpg').name + + max_height, max_width = max(get_fits_dimensions(path) for path in fits_paths) + + fits_to_jpg(fits_paths, large_jpg_path, width=max_width, height=max_height, color=color) + fits_to_jpg(fits_paths, thumbnail_jpg_path, color=color) + + return large_jpg_path, thumbnail_jpg_path + +def save_fits_and_thumbnails(cache_key, fits_path, large_jpg_path, thumbnail_jpg_path, index=None): + """ + Save Fits and Thumbnails in S3 Buckets, Returns the URLs in an output object + """ + bucket_key = f'{cache_key}/{cache_key}-{index}' if index else f'{cache_key}/{cache_key}' + + fits_url = add_file_to_bucket(f'{bucket_key}.fits', fits_path) + large_jpg_url = add_file_to_bucket(f'{bucket_key}-large.jpg', large_jpg_path) + thumbnail_jpg_url = add_file_to_bucket(f'{bucket_key}-small.jpg', thumbnail_jpg_path) + + output_file = dict({ + 'fits_url': fits_url, + 'large_url': large_jpg_url, + 'thumbnail_url': thumbnail_jpg_url, + 'basename': f'{cache_key}', + 'source': 'datalab'} + ) + + return output_file + def stack_arrays(array_list: list): """ Takes a list of numpy arrays, crops them to an equal shape, and stacks them to be a 3d numpy array