Skip to content

Commit

Permalink
split create_jpg_outputs into two functions, create_jpg, and save out…
Browse files Browse the repository at this point in the history
…put, moved functions from data operations to util file
  • Loading branch information
LTDakin committed Aug 1, 2024
1 parent e74c634 commit 8ac1c7c
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 64 deletions.
56 changes: 1 addition & 55 deletions datalab/datalab_session/data_operations/data_operation.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
from abc import ABC, abstractmethod
import hashlib
import json
import tempfile

from django.core.cache import cache
from fits2image.conversions import fits_to_jpg
from astropy.io import fits
import numpy as np

from datalab.datalab_session.tasks import execute_data_operation
from datalab.datalab_session.util import add_file_to_bucket, get_hdu, get_fits_dimensions, stack_arrays, create_fits
from datalab.datalab_session.util import get_hdu

CACHE_DURATION = 60 * 60 * 24 * 30 # cache for 30 days

Expand Down Expand Up @@ -99,57 +96,6 @@ def set_failed(self, message: str):
self.set_status('FAILED')
self.set_message(message)

def create_jpg_output(self, fits_paths: str, percent=None, cur_percent=None, color=False, index=None) -> list:
"""
Create jpgs from fits files and save them to S3
If using the color option fits_paths need to be in order R, G, B
percent and cur_percent are used to update the progress of the operation
"""

if not isinstance(fits_paths, list):
fits_paths = [fits_paths]

# create the jpgs from the fits files
large_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{self.cache_key}-large.jpg').name
thumbnail_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{self.cache_key}-small.jpg').name

max_height, max_width = max(get_fits_dimensions(path) for path in fits_paths)

fits_to_jpg(fits_paths, large_jpg_path, width=max_width, height=max_height, color=color)
fits_to_jpg(fits_paths, thumbnail_jpg_path, color=color)

# color photos take three files, so we store it as one fits file with a 3d SCI ndarray
if color:
arrays = [fits.open(file)['SCI'].data for file in fits_paths]
stacked_data = stack_arrays(arrays)
fits_file = create_fits(self.cache_key, stacked_data)
else:
fits_file = fits_paths[0]


# Save Fits and Thumbnails in S3 Buckets
bucket_key = f'{self.cache_key}/{self.cache_key}-{index}' if index else f'{self.cache_key}/{self.cache_key}'

fits_url = add_file_to_bucket(f'{bucket_key}.fits', fits_file)
large_jpg_url = add_file_to_bucket(f'{bucket_key}-large.jpg', large_jpg_path)
thumbnail_jpg_url = add_file_to_bucket(f'{bucket_key}-small.jpg', thumbnail_jpg_path)

output = []
output.append({
'fits_url': fits_url,
'large_url': large_jpg_url,
'thumbnail_url': thumbnail_jpg_url,
'basename': f'{self.cache_key}',
'source': 'datalab'}
)

if percent is not None and cur_percent is not None:
self.set_percent_completion(cur_percent + percent)
else:
self.set_percent_completion(0.9)

return output

def get_fits_npdata(self, input_files: list[dict], percent=None, cur_percent=None) -> list[np.memmap]:
total_files = len(input_files)
image_data_list = []
Expand Down
8 changes: 5 additions & 3 deletions datalab/datalab_session/data_operations/median.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np

from datalab.datalab_session.data_operations.data_operation import BaseDataOperation
from datalab.datalab_session.util import create_fits, stack_arrays
from datalab.datalab_session.util import create_fits, stack_arrays, create_jpgs, save_fits_and_thumbnails

log = logging.getLogger()
log.setLevel(logging.INFO)
Expand Down Expand Up @@ -54,9 +54,11 @@ def operate(self):

fits_file = create_fits(self.cache_key, median)

output = self.create_jpg_output(fits_file, percent=0.6, cur_percent=0.4)
large_jpg_path, small_jpg_path = create_jpgs(self.cache_key, fits_file)

output = {'output_files': output}
output_file = save_fits_and_thumbnails(self.cache_key, fits_file, large_jpg_path, small_jpg_path)

output = {'output_files': [output_file]}
else:
output = {'output_files': []}

Expand Down
16 changes: 11 additions & 5 deletions datalab/datalab_session/data_operations/rgb_stack.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import logging
import tempfile

from fits2image.conversions import fits_to_jpg
from astropy.io import fits

from datalab.datalab_session.data_operations.data_operation import BaseDataOperation
from datalab.datalab_session.util import add_file_to_bucket, get_fits
from datalab.datalab_session.util import get_fits, stack_arrays, create_fits, save_fits_and_thumbnails, create_jpgs

log = logging.getLogger()
log.setLevel(logging.INFO)
Expand Down Expand Up @@ -65,9 +64,16 @@ def operate(self):
fits_paths.append(get_fits(file.get('basename')))
self.set_percent_completion(self.get_percent_completion() + 0.2)

output = self.create_jpg_output(fits_paths, percent=0.9, cur_percent=0.0, color=True)
large_jpg_path, small_jpg_path = create_jpgs(self.cache_key, fits_paths, color=True)

output = {'output_files': output}
# color photos take three files, so we store it as one fits file with a 3d SCI ndarray
arrays = [fits.open(file)['SCI'].data for file in fits_paths]
stacked_data = stack_arrays(arrays)
fits_file = create_fits(self.cache_key, stacked_data)

output_file = save_fits_and_thumbnails(self.cache_key, fits_file, large_jpg_path, small_jpg_path)

output = {'output_files': [output_file]}
else:
output = {'output_files': []}
raise ValueError('RGB Stack operation requires exactly 3 input files')
Expand Down
44 changes: 43 additions & 1 deletion datalab/datalab_session/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
import boto3
from astropy.io import fits
import numpy as np
from botocore.exceptions import ClientError

from django.conf import settings
from botocore.exceptions import ClientError
from fits2image.conversions import fits_to_jpg

log = logging.getLogger()
log.setLevel(logging.INFO)
Expand Down Expand Up @@ -175,6 +176,47 @@ def create_fits(key: str, image_arr: np.ndarray) -> str:

return fits_path

def create_jpgs(cache_key, fits_paths: str, color=False) -> list:
"""
Create jpgs from fits files and save them to S3
If using the color option fits_paths need to be in order R, G, B
percent and cur_percent are used to update the progress of the operation
"""

if not isinstance(fits_paths, list):
fits_paths = [fits_paths]

# create the jpgs from the fits files
large_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-large.jpg').name
thumbnail_jpg_path = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-small.jpg').name

max_height, max_width = max(get_fits_dimensions(path) for path in fits_paths)

fits_to_jpg(fits_paths, large_jpg_path, width=max_width, height=max_height, color=color)
fits_to_jpg(fits_paths, thumbnail_jpg_path, color=color)

return large_jpg_path, thumbnail_jpg_path

def save_fits_and_thumbnails(cache_key, fits_path, large_jpg_path, thumbnail_jpg_path, index=None):
"""
Save Fits and Thumbnails in S3 Buckets, Returns the URLs in an output object
"""
bucket_key = f'{cache_key}/{cache_key}-{index}' if index else f'{cache_key}/{cache_key}'

fits_url = add_file_to_bucket(f'{bucket_key}.fits', fits_path)
large_jpg_url = add_file_to_bucket(f'{bucket_key}-large.jpg', large_jpg_path)
thumbnail_jpg_url = add_file_to_bucket(f'{bucket_key}-small.jpg', thumbnail_jpg_path)

output_file = dict({
'fits_url': fits_url,
'large_url': large_jpg_url,
'thumbnail_url': thumbnail_jpg_url,
'basename': f'{cache_key}',
'source': 'datalab'}
)

return output_file

def stack_arrays(array_list: list):
"""
Takes a list of numpy arrays, crops them to an equal shape, and stacks them to be a 3d numpy array
Expand Down

0 comments on commit 8ac1c7c

Please sign in to comment.