From 80fcb8041f7f16f4cb64e8fd9711dbfcadee8dba Mon Sep 17 00:00:00 2001 From: Lloyd Dakin Date: Wed, 9 Oct 2024 14:28:42 -0700 Subject: [PATCH 1/4] fits_file_reader and output_handler classes, changes to operations, tests --- .../data_operations/fits_file_reader.py | 25 +++++++++++++ .../data_operations/fits_output_handler.py | 35 ++++++++++++++++++ .../datalab_session/data_operations/median.py | 32 ++++++++-------- .../data_operations/normalization.py | 23 ++++++------ .../data_operations/rgb_stack.py | 35 ++++++++---------- .../data_operations/stacking.py | 26 ++++++------- .../data_operations/subtraction.py | 33 ++++++++--------- .../tests/test_files/median/median_1_2.fits | Bin 46080 -> 46080 bytes .../tests/test_files/rgb_stack/rgb_stack.fits | Bin 126720 -> 126720 bytes .../datalab_session/tests/test_operations.py | 18 ++++----- 10 files changed, 140 insertions(+), 87 deletions(-) create mode 100644 datalab/datalab_session/data_operations/fits_file_reader.py create mode 100644 datalab/datalab_session/data_operations/fits_output_handler.py diff --git a/datalab/datalab_session/data_operations/fits_file_reader.py b/datalab/datalab_session/data_operations/fits_file_reader.py new file mode 100644 index 0000000..0ec6f2b --- /dev/null +++ b/datalab/datalab_session/data_operations/fits_file_reader.py @@ -0,0 +1,25 @@ +from astropy.io import fits + +from datalab.datalab_session.s3_utils import get_fits +from datalab.datalab_session.file_utils import get_hdu + +class FITSFileReader: + + basename = None + fits_file = None + hdu_list = None + + def __init__(self, basename: str, source: str = None) -> None: + self.basename = basename + self.fits_file = get_fits(basename, source) + self.hdu_list = fits.open(self.fits_file) + + def __str__(self) -> str: + return f"{self.basename}@{self.fits_file}\nHDU List\n{self.hdu_list.info()}" + + @property + def sci_data(self): + return self.hdu_list['SCI'].data + + def hdu(self, extension: str): + return get_hdu(self.fits_file, extension) diff --git a/datalab/datalab_session/data_operations/fits_output_handler.py b/datalab/datalab_session/data_operations/fits_output_handler.py new file mode 100644 index 0000000..0a14710 --- /dev/null +++ b/datalab/datalab_session/data_operations/fits_output_handler.py @@ -0,0 +1,35 @@ +import tempfile +import numpy as np +from astropy.io import fits + +from datalab.datalab_session.file_utils import create_jpgs +from datalab.datalab_session.s3_utils import save_fits_and_thumbnails + + +class FITSOutputHandler(): + + def __init__(self, key: str, data: np.array, comment: str=None) -> None: + self.key = key + self.primary_hdu = fits.PrimaryHDU(header=fits.Header([('KEY', key)])) + self.image_hdu = fits.ImageHDU(data=data, name='SCI') + if comment: self.set_comment(comment) + + def __str__(self) -> str: + return f"Key: {self.key}\nData:\n{self.data}" + + def set_comment(self, comment: str): + self.primary_hdu.header.add_comment(comment) + + def set_sci_data(self, new_data: np.array): + self.image_hdu.data = new_data + + def create_save_fits(self, index: int=None, large_jpg: str=None, small_jpg: str=None): + hdu_list = fits.HDUList([self.primary_hdu, self.image_hdu]) + fits_output_path = tempfile.NamedTemporaryFile(suffix=f'{self.key}.fits').name + hdu_list.writeto(fits_output_path, overwrite=True) + + # allow for operations to pregenerate the jpgs, ex. RGB stacking + if not large_jpg or not small_jpg: + large_jpg, small_jpg = create_jpgs(self.key, fits_output_path) + + return save_fits_and_thumbnails(self.key, fits_output_path, large_jpg, small_jpg, index) diff --git a/datalab/datalab_session/data_operations/median.py b/datalab/datalab_session/data_operations/median.py index a8198f5..508ed5b 100644 --- a/datalab/datalab_session/data_operations/median.py +++ b/datalab/datalab_session/data_operations/median.py @@ -2,9 +2,11 @@ import numpy as np +from datalab.datalab_session.data_operations.fits_file_reader import FITSFileReader +from datalab.datalab_session.data_operations.fits_output_handler import FITSOutputHandler from datalab.datalab_session.data_operations.data_operation import BaseDataOperation from datalab.datalab_session.exceptions import ClientAlertException -from datalab.datalab_session.file_utils import crop_arrays, create_output +from datalab.datalab_session.file_utils import crop_arrays log = logging.getLogger() log.setLevel(logging.INFO) @@ -40,25 +42,21 @@ def wizard_description(): } def operate(self): + # Getting/Checking the Input + input_list = self.input_data.get('input_files', []) + if len(input_list) <= 1: raise ClientAlertException('Median needs at least 2 files') + comment = f'Datalab Median on {", ".join([image["basename"] for image in input_list])}' + log.info(comment) - input = self.input_data.get('input_files', []) + input_FITS_list = [FITSFileReader(input['basename'], input['source']) for input in input_list] - if len(input) <= 1: - raise ClientAlertException('Median needs at least 2 files') - - log.info(f'Executing median operation on {len(input)} files') - - image_data_list = self.get_fits_npdata(input) - - cropped_data_list = crop_arrays(image_data_list) - stacked_data = np.stack(cropped_data_list, axis=2) - - # using the numpy library's median method - median = np.median(stacked_data, axis=2) + # Creating the Median array + cropped_data = crop_arrays([image.sci_data for image in input_FITS_list]) + stacked_ndarray = np.stack(cropped_data, axis=2) + median = np.median(stacked_ndarray, axis=2) self.set_operation_progress(0.80) - output = create_output(self.cache_key, median, comment=f'Product of Datalab Median on files {", ".join([image["basename"] for image in input])}') - + output = FITSOutputHandler(self.cache_key, median, comment).create_save_fits() + log.info(f'Median output: {output}') self.set_output(output) - log.info(f'Median output: {self.get_output()}') diff --git a/datalab/datalab_session/data_operations/normalization.py b/datalab/datalab_session/data_operations/normalization.py index 803fb99..2c07316 100644 --- a/datalab/datalab_session/data_operations/normalization.py +++ b/datalab/datalab_session/data_operations/normalization.py @@ -2,8 +2,9 @@ import numpy as np +from datalab.datalab_session.data_operations.fits_file_reader import FITSFileReader from datalab.datalab_session.data_operations.data_operation import BaseDataOperation -from datalab.datalab_session.file_utils import create_output +from datalab.datalab_session.data_operations.fits_output_handler import FITSOutputHandler log = logging.getLogger() log.setLevel(logging.INFO) @@ -40,20 +41,20 @@ def wizard_description(): def operate(self): - input = self.input_data.get('input_files', []) + input_list = self.input_data.get('input_files', []) + log.info(f'Normalization operation on {len(input_list)} file(s)') - log.info(f'Executing normalization operation on {len(input)} file(s)') - - image_data_list = self.get_fits_npdata(input) + input_FITS_list = [FITSFileReader(input['basename'], input['source']) for input in input_list] output_files = [] - for index, image in enumerate(image_data_list, start=1): - median = np.median(image) - normalized_image = image / median + for index, image in enumerate(input_FITS_list, start=1): + median = np.median(image.sci_data) + normalized_image = image.sci_data / median - output = create_output(self.cache_key, normalized_image, index=index, comment=f'Product of Datalab Normalization on file {input[index-1]["basename"]}') + comment = f'Datalab Normalization on file {input_list[index-1]["basename"]}' + output = FITSOutputHandler(f'{self.cache_key}', normalized_image, comment).create_save_fits(index=index) output_files.append(output) - self.set_operation_progress(0.5 + index/len(image_data_list) * 0.4) + self.set_operation_progress(0.5 + index/len(input_FITS_list) * 0.4) + log.info(f'Normalization output: {output_files}') self.set_output(output_files) - log.info(f'Normalization output: {self.get_output()}') diff --git a/datalab/datalab_session/data_operations/rgb_stack.py b/datalab/datalab_session/data_operations/rgb_stack.py index 51b1cf4..ea4a521 100644 --- a/datalab/datalab_session/data_operations/rgb_stack.py +++ b/datalab/datalab_session/data_operations/rgb_stack.py @@ -3,10 +3,11 @@ from astropy.io import fits import numpy as np +from datalab.datalab_session.data_operations.fits_file_reader import FITSFileReader from datalab.datalab_session.data_operations.data_operation import BaseDataOperation +from datalab.datalab_session.data_operations.fits_output_handler import FITSOutputHandler from datalab.datalab_session.exceptions import ClientAlertException -from datalab.datalab_session.file_utils import create_output, crop_arrays, create_jpgs -from datalab.datalab_session.s3_utils import get_fits +from datalab.datalab_session.file_utils import crop_arrays, create_jpgs log = logging.getLogger() log.setLevel(logging.INFO) @@ -58,28 +59,24 @@ def wizard_description(): def operate(self): rgb_input_list = self.input_data['red_input'] + self.input_data['green_input'] + self.input_data['blue_input'] + if len(rgb_input_list) != 3: raise ClientAlertException('RGB stack requires exactly 3 files') + rgb_comment = f'Datalab RGB Stack on files {", ".join([image["basename"] for image in rgb_input_list])}' + log.info(rgb_comment) - if len(rgb_input_list) != 3: - raise ClientAlertException('RGB stack requires exactly 3 files') - - log.info(f'Executing RGB Stack operation on files: {rgb_input_list}') + input_FITS_list = [FITSFileReader(input['basename'], input['source']) for input in rgb_input_list] + self.set_operation_progress(0.4) - fits_paths = [] - for index, file in enumerate(rgb_input_list, start=1): - fits_paths.append(get_fits(file.get('basename'))) - self.set_operation_progress(index * 0.2) - - large_jpg_path, small_jpg_path = create_jpgs(self.cache_key, fits_paths, color=True) + fits_file_list = [image.fits_file for image in input_FITS_list] + large_jpg_path, small_jpg_path = create_jpgs(self.cache_key, fits_file_list, color=True) + self.set_operation_progress(0.6) # color photos take three files, so we store it as one fits file with a 3d SCI ndarray - arrays = [fits.open(file)['SCI'].data for file in fits_paths] - cropped_data_list = crop_arrays(arrays) - stacked_data = np.stack(cropped_data_list, axis=2) - + sci_data_list = [image.sci_data for image in input_FITS_list] + cropped_data_list = crop_arrays(sci_data_list) + stacked_ndarray = np.stack(cropped_data_list, axis=2) self.set_operation_progress(0.8) - rgb_comment = f'Product of Datalab RGB Stack on files {", ".join([image["basename"] for image in rgb_input_list])}' - output = create_output(self.cache_key, stacked_data, large_jpg=large_jpg_path, small_jpg=small_jpg_path, comment=rgb_comment) + output = FITSOutputHandler(self.cache_key, stacked_ndarray, rgb_comment).create_save_fits(large_jpg=large_jpg_path, small_jpg=small_jpg_path) + log.info(f'RGB Stack output: {output}') self.set_output(output) - log.info(f'RGB Stack output: {self.get_output()}') diff --git a/datalab/datalab_session/data_operations/stacking.py b/datalab/datalab_session/data_operations/stacking.py index 2d80509..31d87e7 100644 --- a/datalab/datalab_session/data_operations/stacking.py +++ b/datalab/datalab_session/data_operations/stacking.py @@ -2,9 +2,11 @@ import numpy as np +from datalab.datalab_session.data_operations.fits_file_reader import FITSFileReader +from datalab.datalab_session.data_operations.fits_output_handler import FITSOutputHandler from datalab.datalab_session.data_operations.data_operation import BaseDataOperation from datalab.datalab_session.exceptions import ClientAlertException -from datalab.datalab_session.file_utils import create_output, crop_arrays +from datalab.datalab_session.file_utils import crop_arrays log = logging.getLogger() log.setLevel(logging.INFO) @@ -43,24 +45,22 @@ def wizard_description(): def operate(self): input_files = self.input_data.get('input_files', []) + if len(input_files) <= 1: raise ClientAlertException('Stack needs at least 2 files') + comment= f'Datalab Stacking on {", ".join([image["basename"] for image in input_files])}' + log.info(comment) - if len(input_files) <= 1: - raise ClientAlertException('Stack needs at least 2 files') + input_FITS_list = [FITSFileReader(input['basename'], input['source']) for input in input_files] + self.set_operation_progress(0.4) - log.info(f'Executing stacking operation on {len(input_files)} files') - - image_data_list = self.get_fits_npdata(input_files) - - cropped_data = crop_arrays(image_data_list) - stacked_data = np.stack(cropped_data, axis=2) + cropped_data = crop_arrays([image.sci_data for image in input_FITS_list]) + stacked_ndarray = np.stack(cropped_data, axis=2) self.set_operation_progress(0.6) # using the numpy library's sum method - stacked_sum = np.sum(stacked_data, axis=2) + stacked_sum = np.sum(stacked_ndarray, axis=2) self.set_operation_progress(0.8) - stacking_comment = f'Product of Datalab Stacking. Stack of {", ".join([image["basename"] for image in input_files])}' - output = create_output(self.cache_key, stacked_sum, comment=stacking_comment) + output = FITSOutputHandler(self.cache_key, stacked_sum, comment).create_save_fits() + log.info(f'Stacked output: {output}') self.set_output(output) - log.info(f'Stacked output: {self.get_output()}') diff --git a/datalab/datalab_session/data_operations/subtraction.py b/datalab/datalab_session/data_operations/subtraction.py index e171cc1..6534e91 100644 --- a/datalab/datalab_session/data_operations/subtraction.py +++ b/datalab/datalab_session/data_operations/subtraction.py @@ -2,9 +2,11 @@ import numpy as np +from datalab.datalab_session.data_operations.fits_file_reader import FITSFileReader from datalab.datalab_session.data_operations.data_operation import BaseDataOperation +from datalab.datalab_session.data_operations.fits_output_handler import FITSOutputHandler from datalab.datalab_session.exceptions import ClientAlertException -from datalab.datalab_session.file_utils import crop_arrays, create_output +from datalab.datalab_session.file_utils import crop_arrays log = logging.getLogger() log.setLevel(logging.INFO) @@ -52,30 +54,25 @@ def operate(self): input_files = self.input_data.get('input_files', []) subtraction_file_input = self.input_data.get('subtraction_file', []) - if not subtraction_file_input: - raise ClientAlertException('Missing a subtraction file') + if not subtraction_file_input: raise ClientAlertException('Missing a subtraction file') + if len(input_files) < 1: raise ClientAlertException('Need at least one input file') - if len(input_files) < 1: - raise ClientAlertException('Need at least one input file') + log.info(f'Subtraction operation on {len(input_files)} files') - log.info(f'Executing subtraction operation on {len(input_files)} files') - - input_image_data_list = self.get_fits_npdata(input_files) - - subtraction_image = self.get_fits_npdata(subtraction_file_input)[0] - self.set_operation_progress(0.70) + input_FITS_list = [FITSFileReader(input['basename'], input['source']) for input in input_files] + subtraction_FITS = FITSFileReader(subtraction_file_input[0]['basename'], subtraction_file_input[0]['source']) + self.set_operation_progress(0.5) outputs = [] - for index, input_image in enumerate(input_image_data_list): + for index, input_image in enumerate(input_FITS_list, start=1): # crop the input_image and subtraction_image to the same size - input_image, subtraction_image = crop_arrays([input_image, subtraction_image]) + input_image, subtraction_image = crop_arrays([input_image.sci_data, subtraction_FITS.sci_data]) difference_array = np.subtract(input_image, subtraction_image) - subtraction_comment = f'Product of Datalab Subtraction of {subtraction_file_input[0]["basename"]} subtracted from {input_files[index]["basename"]}' - outputs.append(create_output(self.cache_key, difference_array, index=index, comment=subtraction_comment)) - - self.set_operation_progress(0.90) + subtraction_comment = f'Datalab Subtraction of {subtraction_file_input[0]["basename"]} subtracted from {input_files[index-1]["basename"]}' + outputs.append(FITSOutputHandler(f'{self.cache_key}', difference_array, subtraction_comment).create_save_fits(index=index)) + self.set_operation_progress(0.5 + index/len(input_FITS_list) * 0.4) + log.info(f'Subtraction output: {outputs}') self.set_output(outputs) - log.info(f'Subtraction output: {self.get_output()}') diff --git a/datalab/datalab_session/tests/test_files/median/median_1_2.fits b/datalab/datalab_session/tests/test_files/median/median_1_2.fits index 2a172b712f01e9d53b0d0dededae80648a678180..898ce8fc2d75536a886dd21fd493104a3ae4faf7 100644 GIT binary patch delta 40 wcmZp8!PM}AX~PUgL6^jm#GJ$=1>e+^%)~r}{JhD#7}X{#FbZydz^JeZ06sDf;s5{u delta 53 zcmZp8!PM}AX~PUgrGTRRl+xr9h5R%Hm&B69oWvvr-_(@M#5{%kJcYE(oYdmUD;d=` JKVVeY1OO?c6h;65 diff --git a/datalab/datalab_session/tests/test_files/rgb_stack/rgb_stack.fits b/datalab/datalab_session/tests/test_files/rgb_stack/rgb_stack.fits index 3219b2e7ac415c08516d299f4a48461066ac1492..e08546ec001426d437976705ffb771056d2ea9ed 100644 GIT binary patch delta 31 hcmZp;$KG&{eZvgK$%`3nITWCv`7Y!3yNry=ZvnC(3YP!? delta 33 pcmZp;$KG&{eZve!u7INal+xr9h5WS1(;4lWA24ozz{sfZ769i24QBuV diff --git a/datalab/datalab_session/tests/test_operations.py b/datalab/datalab_session/tests/test_operations.py index c096fed..d16dfd7 100644 --- a/datalab/datalab_session/tests/test_operations.py +++ b/datalab/datalab_session/tests/test_operations.py @@ -169,9 +169,9 @@ def tearDown(self): return super().tearDown() @mock.patch('datalab.datalab_session.file_utils.tempfile.NamedTemporaryFile') - @mock.patch('datalab.datalab_session.data_operations.data_operation.get_fits') - @mock.patch('datalab.datalab_session.file_utils.save_fits_and_thumbnails') - @mock.patch('datalab.datalab_session.file_utils.create_jpgs') + @mock.patch('datalab.datalab_session.data_operations.fits_file_reader.get_fits') + @mock.patch('datalab.datalab_session.data_operations.fits_output_handler.save_fits_and_thumbnails') + @mock.patch('datalab.datalab_session.data_operations.fits_output_handler.create_jpgs') def test_operate(self, mock_create_jpgs, mock_save_fits_and_thumbnails, mock_get_fits, mock_named_tempfile): # return the test fits paths in order of the input_files instead of aws fetch @@ -221,10 +221,10 @@ def tearDown(self): self.clean_test_dir() return super().tearDown() - @mock.patch('datalab.datalab_session.file_utils.save_fits_and_thumbnails') - @mock.patch('datalab.datalab_session.file_utils.create_jpgs') + @mock.patch('datalab.datalab_session.data_operations.fits_output_handler.save_fits_and_thumbnails') + @mock.patch('datalab.datalab_session.data_operations.fits_output_handler.create_jpgs') @mock.patch('datalab.datalab_session.file_utils.tempfile.NamedTemporaryFile') - @mock.patch('datalab.datalab_session.data_operations.rgb_stack.get_fits') + @mock.patch('datalab.datalab_session.data_operations.fits_file_reader.get_fits') def test_operate(self, mock_get_fits, mock_named_tempfile, mock_create_jpgs, mock_save_fits_and_thumbnails): # return the test fits paths in order of the input_files instead of aws fetch @@ -265,9 +265,9 @@ def tearDown(self): return super().tearDown() @mock.patch('datalab.datalab_session.file_utils.tempfile.NamedTemporaryFile') - @mock.patch('datalab.datalab_session.data_operations.data_operation.get_fits') - @mock.patch('datalab.datalab_session.file_utils.save_fits_and_thumbnails') - @mock.patch('datalab.datalab_session.file_utils.create_jpgs') + @mock.patch('datalab.datalab_session.data_operations.fits_file_reader.get_fits') + @mock.patch('datalab.datalab_session.data_operations.fits_output_handler.save_fits_and_thumbnails') + @mock.patch('datalab.datalab_session.data_operations.fits_output_handler.create_jpgs') def test_operate(self, mock_create_jpgs, mock_save_fits_and_thumbnails, mock_get_fits, mock_named_tempfile): # Create a negative images using numpy From 9e3777d119e60bdeaf5d62738d4b87ce417156ec Mon Sep 17 00:00:00 2001 From: Lloyd Dakin Date: Wed, 9 Oct 2024 15:04:17 -0700 Subject: [PATCH 2/4] input download progress tracking --- datalab/datalab_session/data_operations/median.py | 5 ++++- datalab/datalab_session/data_operations/normalization.py | 5 ++++- datalab/datalab_session/data_operations/rgb_stack.py | 6 ++++-- datalab/datalab_session/data_operations/stacking.py | 6 ++++-- datalab/datalab_session/data_operations/subtraction.py | 7 +++++-- 5 files changed, 21 insertions(+), 8 deletions(-) diff --git a/datalab/datalab_session/data_operations/median.py b/datalab/datalab_session/data_operations/median.py index 508ed5b..d36cc1d 100644 --- a/datalab/datalab_session/data_operations/median.py +++ b/datalab/datalab_session/data_operations/median.py @@ -48,7 +48,10 @@ def operate(self): comment = f'Datalab Median on {", ".join([image["basename"] for image in input_list])}' log.info(comment) - input_FITS_list = [FITSFileReader(input['basename'], input['source']) for input in input_list] + input_FITS_list = [] + for index, input in enumerate(input_list, start=1): + input_FITS_list.append(FITSFileReader(input['basename'], input['source'])) + self.set_operation_progress(0.5 * (index / len(input_list))) # Creating the Median array cropped_data = crop_arrays([image.sci_data for image in input_FITS_list]) diff --git a/datalab/datalab_session/data_operations/normalization.py b/datalab/datalab_session/data_operations/normalization.py index 2c07316..073ff45 100644 --- a/datalab/datalab_session/data_operations/normalization.py +++ b/datalab/datalab_session/data_operations/normalization.py @@ -44,7 +44,10 @@ def operate(self): input_list = self.input_data.get('input_files', []) log.info(f'Normalization operation on {len(input_list)} file(s)') - input_FITS_list = [FITSFileReader(input['basename'], input['source']) for input in input_list] + input_FITS_list = [] + for index, input in enumerate(input_list, start=1): + input_FITS_list.append(FITSFileReader(input['basename'], input['source'])) + self.set_operation_progress(0.5 * (index / len(input_list))) output_files = [] for index, image in enumerate(input_FITS_list, start=1): diff --git a/datalab/datalab_session/data_operations/rgb_stack.py b/datalab/datalab_session/data_operations/rgb_stack.py index ea4a521..c5639c9 100644 --- a/datalab/datalab_session/data_operations/rgb_stack.py +++ b/datalab/datalab_session/data_operations/rgb_stack.py @@ -63,8 +63,10 @@ def operate(self): rgb_comment = f'Datalab RGB Stack on files {", ".join([image["basename"] for image in rgb_input_list])}' log.info(rgb_comment) - input_FITS_list = [FITSFileReader(input['basename'], input['source']) for input in rgb_input_list] - self.set_operation_progress(0.4) + input_FITS_list = [] + for index, input in enumerate(rgb_input_list, start=1): + input_FITS_list.append(FITSFileReader(input['basename'], input['source'])) + self.set_operation_progress(0.4 * (index / len(rgb_input_list))) fits_file_list = [image.fits_file for image in input_FITS_list] large_jpg_path, small_jpg_path = create_jpgs(self.cache_key, fits_file_list, color=True) diff --git a/datalab/datalab_session/data_operations/stacking.py b/datalab/datalab_session/data_operations/stacking.py index 31d87e7..d4f6e10 100644 --- a/datalab/datalab_session/data_operations/stacking.py +++ b/datalab/datalab_session/data_operations/stacking.py @@ -49,8 +49,10 @@ def operate(self): comment= f'Datalab Stacking on {", ".join([image["basename"] for image in input_files])}' log.info(comment) - input_FITS_list = [FITSFileReader(input['basename'], input['source']) for input in input_files] - self.set_operation_progress(0.4) + input_FITS_list = [] + for index, input in enumerate(input_files, start=1): + input_FITS_list.append(FITSFileReader(input['basename'], input['source'])) + self.set_operation_progress(0.5 * (index / len(input_files))) cropped_data = crop_arrays([image.sci_data for image in input_FITS_list]) stacked_ndarray = np.stack(cropped_data, axis=2) diff --git a/datalab/datalab_session/data_operations/subtraction.py b/datalab/datalab_session/data_operations/subtraction.py index 6534e91..4b99159 100644 --- a/datalab/datalab_session/data_operations/subtraction.py +++ b/datalab/datalab_session/data_operations/subtraction.py @@ -59,9 +59,12 @@ def operate(self): log.info(f'Subtraction operation on {len(input_files)} files') - input_FITS_list = [FITSFileReader(input['basename'], input['source']) for input in input_files] subtraction_FITS = FITSFileReader(subtraction_file_input[0]['basename'], subtraction_file_input[0]['source']) - self.set_operation_progress(0.5) + input_FITS_list = [FITSFileReader(input['basename'], input['source']) for input in input_files] + input_FITS_list = [] + for index, input in enumerate(input_files, start=1): + input_FITS_list.append(FITSFileReader(input['basename'], input['source'])) + self.set_operation_progress(0.5 * (index / len(input_files))) outputs = [] for index, input_image in enumerate(input_FITS_list, start=1): From bd4ff4333189b5c9cf1cd62fdbddd8595fb8d35a Mon Sep 17 00:00:00 2001 From: Lloyd Dakin Date: Fri, 11 Oct 2024 09:59:27 -0700 Subject: [PATCH 3/4] Comments on handler classes, naming changes, removing open hduls --- .../data_operations/fits_file_reader.py | 25 --------- .../data_operations/fits_output_handler.py | 51 ++++++++++++++----- .../data_operations/input_data_handler.py | 44 ++++++++++++++++ .../datalab_session/data_operations/median.py | 10 ++-- .../data_operations/normalization.py | 12 ++--- .../data_operations/rgb_stack.py | 12 ++--- .../data_operations/stacking.py | 10 ++-- .../data_operations/subtraction.py | 17 +++---- .../datalab_session/tests/test_operations.py | 6 +-- 9 files changed, 116 insertions(+), 71 deletions(-) delete mode 100644 datalab/datalab_session/data_operations/fits_file_reader.py create mode 100644 datalab/datalab_session/data_operations/input_data_handler.py diff --git a/datalab/datalab_session/data_operations/fits_file_reader.py b/datalab/datalab_session/data_operations/fits_file_reader.py deleted file mode 100644 index 0ec6f2b..0000000 --- a/datalab/datalab_session/data_operations/fits_file_reader.py +++ /dev/null @@ -1,25 +0,0 @@ -from astropy.io import fits - -from datalab.datalab_session.s3_utils import get_fits -from datalab.datalab_session.file_utils import get_hdu - -class FITSFileReader: - - basename = None - fits_file = None - hdu_list = None - - def __init__(self, basename: str, source: str = None) -> None: - self.basename = basename - self.fits_file = get_fits(basename, source) - self.hdu_list = fits.open(self.fits_file) - - def __str__(self) -> str: - return f"{self.basename}@{self.fits_file}\nHDU List\n{self.hdu_list.info()}" - - @property - def sci_data(self): - return self.hdu_list['SCI'].data - - def hdu(self, extension: str): - return get_hdu(self.fits_file, extension) diff --git a/datalab/datalab_session/data_operations/fits_output_handler.py b/datalab/datalab_session/data_operations/fits_output_handler.py index 0a14710..c0a45ba 100644 --- a/datalab/datalab_session/data_operations/fits_output_handler.py +++ b/datalab/datalab_session/data_operations/fits_output_handler.py @@ -7,29 +7,56 @@ class FITSOutputHandler(): + """A class to handle FITS output files and create jpgs. + + Class handles the creation of Datalab output for developers. + The class inits with a cache_key and data, and creates a FITS file with the data. + The FITS file is then saved to the cache and the large and small jpgs are created. + + Attributes: + dlab_id (str): The cache key for the FITS file. + primary_hdu (fits.PrimaryHDU): The primary HDU for the FITS file. + image_hdu (fits.ImageHDU): The image HDU for the FITS file. + data (np.array): The data for the image HDU. + """ - def __init__(self, key: str, data: np.array, comment: str=None) -> None: - self.key = key - self.primary_hdu = fits.PrimaryHDU(header=fits.Header([('KEY', key)])) + def __init__(self, cache_key: str, data: np.array, comment: str=None) -> None: + """Inits FITSOutputHandler with cache_key and data. + + Args: + cache_key (str): The cache key for the FITS file, used as an ID when stored in S3. + data (np.array): The data that will create the image HDU. + comment (str): Optionally add a comment to add to the FITS file. + """ + self.dlab_id = cache_key + self.primary_hdu = fits.PrimaryHDU(header=fits.Header([('KEY', cache_key)])) self.image_hdu = fits.ImageHDU(data=data, name='SCI') if comment: self.set_comment(comment) def __str__(self) -> str: - return f"Key: {self.key}\nData:\n{self.data}" + return f"Key: {self.dlab_id}\nData:\n{self.data}" def set_comment(self, comment: str): + """Add a comment to the FITS file.""" self.primary_hdu.header.add_comment(comment) - - def set_sci_data(self, new_data: np.array): - self.image_hdu.data = new_data - def create_save_fits(self, index: int=None, large_jpg: str=None, small_jpg: str=None): + def create_and_save_data_products(self, index: int=None, large_jpg_path: str=None, small_jpg_path: str=None): + """Create the FITS file and save it to S3. + + This function can be called when you're done with the operation and would like to save the FITS file and jpgs in S3. + It returns a datalab output dictionary that is formatted to be readable by the frontend. + + Args: + index (int): Optionally add an index to the FITS file name. Appended to cache_key for multiple outputs. + large_jpg (str): Optionally add a path to a large jpg to save, will not create a new jpg. + small_jpg (str): Optionally add a path to a small jpg to save, will not create a new jpg. + """ hdu_list = fits.HDUList([self.primary_hdu, self.image_hdu]) - fits_output_path = tempfile.NamedTemporaryFile(suffix=f'{self.key}.fits').name + fits_output_path = tempfile.NamedTemporaryFile(suffix=f'{self.dlab_id}.fits').name hdu_list.writeto(fits_output_path, overwrite=True) # allow for operations to pregenerate the jpgs, ex. RGB stacking - if not large_jpg or not small_jpg: - large_jpg, small_jpg = create_jpgs(self.key, fits_output_path) + if not large_jpg_path or not small_jpg_path: + large_jpg_path, small_jpg_path = create_jpgs(self.dlab_id, fits_output_path) - return save_fits_and_thumbnails(self.key, fits_output_path, large_jpg, small_jpg, index) + return save_fits_and_thumbnails(self.dlab_id, fits_output_path, large_jpg_path, small_jpg_path, index) diff --git a/datalab/datalab_session/data_operations/input_data_handler.py b/datalab/datalab_session/data_operations/input_data_handler.py new file mode 100644 index 0000000..4b03c2f --- /dev/null +++ b/datalab/datalab_session/data_operations/input_data_handler.py @@ -0,0 +1,44 @@ +from astropy.io import fits + +from datalab.datalab_session.s3_utils import get_fits +from datalab.datalab_session.file_utils import get_hdu + +class InputDataHandler(): + """A class to read FITS files and provide access to the data. + + The class inits with a basename and source, and reads the FITS file + this data is then stored in the class attributes for easy access. + + Attributes: + basename (str): The basename of the FITS file. + fits_file (str): The path to the FITS file. + sci_data (np.array): The data from the 'SCI' extension of the FITS file. + """ + + def __init__(self, basename: str, source: str = None) -> None: + """Inits InputDataHandler with basename and source. + + Uses the basename to query the archive for the matching FITS file. + Also can take a source argument to specify a different source for the FITS file. + At the time of writing two common sources are 'datalab' and 'archive'. + New sources will need to be added in the get_fits function in s3_utils.py. + + Args: + basename (str): The basename of the FITS file. + source (str): Optionally add a source to the FITS file in case it's not the LCO archive. + """ + self.basename = basename + self.fits_file = get_fits(basename, source) + self.sci_data = get_hdu(self.fits_file, 'SCI').data + + def __str__(self) -> str: + with fits.open(self.fits_file) as hdul: + return f"{self.basename}@{self.fits_file}\nHDU List\n{self.hdul.info()}" + + def get_hdu(self, extension: str=None): + """Return an HDU from the FITS file. + + Args: + extension (str): The extension to return from the FITS file. Default is 'SCI'. + """ + return get_hdu(self.fits_file, extension) diff --git a/datalab/datalab_session/data_operations/median.py b/datalab/datalab_session/data_operations/median.py index d36cc1d..c0351ae 100644 --- a/datalab/datalab_session/data_operations/median.py +++ b/datalab/datalab_session/data_operations/median.py @@ -2,7 +2,7 @@ import numpy as np -from datalab.datalab_session.data_operations.fits_file_reader import FITSFileReader +from datalab.datalab_session.data_operations.input_data_handler import InputDataHandler from datalab.datalab_session.data_operations.fits_output_handler import FITSOutputHandler from datalab.datalab_session.data_operations.data_operation import BaseDataOperation from datalab.datalab_session.exceptions import ClientAlertException @@ -48,18 +48,18 @@ def operate(self): comment = f'Datalab Median on {", ".join([image["basename"] for image in input_list])}' log.info(comment) - input_FITS_list = [] + input_fits_list = [] for index, input in enumerate(input_list, start=1): - input_FITS_list.append(FITSFileReader(input['basename'], input['source'])) + input_fits_list.append(InputDataHandler(input['basename'], input['source'])) self.set_operation_progress(0.5 * (index / len(input_list))) # Creating the Median array - cropped_data = crop_arrays([image.sci_data for image in input_FITS_list]) + cropped_data = crop_arrays([image.sci_data for image in input_fits_list]) stacked_ndarray = np.stack(cropped_data, axis=2) median = np.median(stacked_ndarray, axis=2) self.set_operation_progress(0.80) - output = FITSOutputHandler(self.cache_key, median, comment).create_save_fits() + output = FITSOutputHandler(self.cache_key, median, comment).create_and_save_data_products() log.info(f'Median output: {output}') self.set_output(output) diff --git a/datalab/datalab_session/data_operations/normalization.py b/datalab/datalab_session/data_operations/normalization.py index 073ff45..e6a003f 100644 --- a/datalab/datalab_session/data_operations/normalization.py +++ b/datalab/datalab_session/data_operations/normalization.py @@ -2,7 +2,7 @@ import numpy as np -from datalab.datalab_session.data_operations.fits_file_reader import FITSFileReader +from datalab.datalab_session.data_operations.input_data_handler import InputDataHandler from datalab.datalab_session.data_operations.data_operation import BaseDataOperation from datalab.datalab_session.data_operations.fits_output_handler import FITSOutputHandler @@ -44,20 +44,20 @@ def operate(self): input_list = self.input_data.get('input_files', []) log.info(f'Normalization operation on {len(input_list)} file(s)') - input_FITS_list = [] + input_fits_list = [] for index, input in enumerate(input_list, start=1): - input_FITS_list.append(FITSFileReader(input['basename'], input['source'])) + input_fits_list.append(InputDataHandler(input['basename'], input['source'])) self.set_operation_progress(0.5 * (index / len(input_list))) output_files = [] - for index, image in enumerate(input_FITS_list, start=1): + for index, image in enumerate(input_fits_list, start=1): median = np.median(image.sci_data) normalized_image = image.sci_data / median comment = f'Datalab Normalization on file {input_list[index-1]["basename"]}' - output = FITSOutputHandler(f'{self.cache_key}', normalized_image, comment).create_save_fits(index=index) + output = FITSOutputHandler(f'{self.cache_key}', normalized_image, comment).create_and_save_data_products(index=index) output_files.append(output) - self.set_operation_progress(0.5 + index/len(input_FITS_list) * 0.4) + self.set_operation_progress(0.5 + index/len(input_fits_list) * 0.4) log.info(f'Normalization output: {output_files}') self.set_output(output_files) diff --git a/datalab/datalab_session/data_operations/rgb_stack.py b/datalab/datalab_session/data_operations/rgb_stack.py index c5639c9..43d09c7 100644 --- a/datalab/datalab_session/data_operations/rgb_stack.py +++ b/datalab/datalab_session/data_operations/rgb_stack.py @@ -3,7 +3,7 @@ from astropy.io import fits import numpy as np -from datalab.datalab_session.data_operations.fits_file_reader import FITSFileReader +from datalab.datalab_session.data_operations.input_data_handler import InputDataHandler from datalab.datalab_session.data_operations.data_operation import BaseDataOperation from datalab.datalab_session.data_operations.fits_output_handler import FITSOutputHandler from datalab.datalab_session.exceptions import ClientAlertException @@ -63,22 +63,22 @@ def operate(self): rgb_comment = f'Datalab RGB Stack on files {", ".join([image["basename"] for image in rgb_input_list])}' log.info(rgb_comment) - input_FITS_list = [] + input_fits_list = [] for index, input in enumerate(rgb_input_list, start=1): - input_FITS_list.append(FITSFileReader(input['basename'], input['source'])) + input_fits_list.append(InputDataHandler(input['basename'], input['source'])) self.set_operation_progress(0.4 * (index / len(rgb_input_list))) - fits_file_list = [image.fits_file for image in input_FITS_list] + fits_file_list = [image.fits_file for image in input_fits_list] large_jpg_path, small_jpg_path = create_jpgs(self.cache_key, fits_file_list, color=True) self.set_operation_progress(0.6) # color photos take three files, so we store it as one fits file with a 3d SCI ndarray - sci_data_list = [image.sci_data for image in input_FITS_list] + sci_data_list = [image.sci_data for image in input_fits_list] cropped_data_list = crop_arrays(sci_data_list) stacked_ndarray = np.stack(cropped_data_list, axis=2) self.set_operation_progress(0.8) - output = FITSOutputHandler(self.cache_key, stacked_ndarray, rgb_comment).create_save_fits(large_jpg=large_jpg_path, small_jpg=small_jpg_path) + output = FITSOutputHandler(self.cache_key, stacked_ndarray, rgb_comment).create_and_save_data_products(large_jpg_path=large_jpg_path, small_jpg_path=small_jpg_path) log.info(f'RGB Stack output: {output}') self.set_output(output) diff --git a/datalab/datalab_session/data_operations/stacking.py b/datalab/datalab_session/data_operations/stacking.py index d4f6e10..655e0f3 100644 --- a/datalab/datalab_session/data_operations/stacking.py +++ b/datalab/datalab_session/data_operations/stacking.py @@ -2,7 +2,7 @@ import numpy as np -from datalab.datalab_session.data_operations.fits_file_reader import FITSFileReader +from datalab.datalab_session.data_operations.input_data_handler import InputDataHandler from datalab.datalab_session.data_operations.fits_output_handler import FITSOutputHandler from datalab.datalab_session.data_operations.data_operation import BaseDataOperation from datalab.datalab_session.exceptions import ClientAlertException @@ -49,12 +49,12 @@ def operate(self): comment= f'Datalab Stacking on {", ".join([image["basename"] for image in input_files])}' log.info(comment) - input_FITS_list = [] + input_fits_list = [] for index, input in enumerate(input_files, start=1): - input_FITS_list.append(FITSFileReader(input['basename'], input['source'])) + input_fits_list.append(InputDataHandler(input['basename'], input['source'])) self.set_operation_progress(0.5 * (index / len(input_files))) - cropped_data = crop_arrays([image.sci_data for image in input_FITS_list]) + cropped_data = crop_arrays([image.sci_data for image in input_fits_list]) stacked_ndarray = np.stack(cropped_data, axis=2) self.set_operation_progress(0.6) @@ -62,7 +62,7 @@ def operate(self): stacked_sum = np.sum(stacked_ndarray, axis=2) self.set_operation_progress(0.8) - output = FITSOutputHandler(self.cache_key, stacked_sum, comment).create_save_fits() + output = FITSOutputHandler(self.cache_key, stacked_sum, comment).create_and_save_data_products() log.info(f'Stacked output: {output}') self.set_output(output) diff --git a/datalab/datalab_session/data_operations/subtraction.py b/datalab/datalab_session/data_operations/subtraction.py index 4b99159..ccfcea6 100644 --- a/datalab/datalab_session/data_operations/subtraction.py +++ b/datalab/datalab_session/data_operations/subtraction.py @@ -2,7 +2,7 @@ import numpy as np -from datalab.datalab_session.data_operations.fits_file_reader import FITSFileReader +from datalab.datalab_session.data_operations.input_data_handler import InputDataHandler from datalab.datalab_session.data_operations.data_operation import BaseDataOperation from datalab.datalab_session.data_operations.fits_output_handler import FITSOutputHandler from datalab.datalab_session.exceptions import ClientAlertException @@ -59,23 +59,22 @@ def operate(self): log.info(f'Subtraction operation on {len(input_files)} files') - subtraction_FITS = FITSFileReader(subtraction_file_input[0]['basename'], subtraction_file_input[0]['source']) - input_FITS_list = [FITSFileReader(input['basename'], input['source']) for input in input_files] - input_FITS_list = [] + subtraction_fits = InputDataHandler(subtraction_file_input[0]['basename'], subtraction_file_input[0]['source']) + input_fits_list = [] for index, input in enumerate(input_files, start=1): - input_FITS_list.append(FITSFileReader(input['basename'], input['source'])) + input_fits_list.append(InputDataHandler(input['basename'], input['source'])) self.set_operation_progress(0.5 * (index / len(input_files))) outputs = [] - for index, input_image in enumerate(input_FITS_list, start=1): + for index, input_image in enumerate(input_fits_list, start=1): # crop the input_image and subtraction_image to the same size - input_image, subtraction_image = crop_arrays([input_image.sci_data, subtraction_FITS.sci_data]) + input_image, subtraction_image = crop_arrays([input_image.sci_data, subtraction_fits.sci_data]) difference_array = np.subtract(input_image, subtraction_image) subtraction_comment = f'Datalab Subtraction of {subtraction_file_input[0]["basename"]} subtracted from {input_files[index-1]["basename"]}' - outputs.append(FITSOutputHandler(f'{self.cache_key}', difference_array, subtraction_comment).create_save_fits(index=index)) - self.set_operation_progress(0.5 + index/len(input_FITS_list) * 0.4) + outputs.append(FITSOutputHandler(f'{self.cache_key}', difference_array, subtraction_comment).create_and_save_data_products(index=index)) + self.set_operation_progress(0.5 + index/len(input_fits_list) * 0.4) log.info(f'Subtraction output: {outputs}') self.set_output(outputs) diff --git a/datalab/datalab_session/tests/test_operations.py b/datalab/datalab_session/tests/test_operations.py index d16dfd7..0e769ee 100644 --- a/datalab/datalab_session/tests/test_operations.py +++ b/datalab/datalab_session/tests/test_operations.py @@ -169,7 +169,7 @@ def tearDown(self): return super().tearDown() @mock.patch('datalab.datalab_session.file_utils.tempfile.NamedTemporaryFile') - @mock.patch('datalab.datalab_session.data_operations.fits_file_reader.get_fits') + @mock.patch('datalab.datalab_session.data_operations.input_data_handler.get_fits') @mock.patch('datalab.datalab_session.data_operations.fits_output_handler.save_fits_and_thumbnails') @mock.patch('datalab.datalab_session.data_operations.fits_output_handler.create_jpgs') def test_operate(self, mock_create_jpgs, mock_save_fits_and_thumbnails, mock_get_fits, mock_named_tempfile): @@ -224,7 +224,7 @@ def tearDown(self): @mock.patch('datalab.datalab_session.data_operations.fits_output_handler.save_fits_and_thumbnails') @mock.patch('datalab.datalab_session.data_operations.fits_output_handler.create_jpgs') @mock.patch('datalab.datalab_session.file_utils.tempfile.NamedTemporaryFile') - @mock.patch('datalab.datalab_session.data_operations.fits_file_reader.get_fits') + @mock.patch('datalab.datalab_session.data_operations.input_data_handler.get_fits') def test_operate(self, mock_get_fits, mock_named_tempfile, mock_create_jpgs, mock_save_fits_and_thumbnails): # return the test fits paths in order of the input_files instead of aws fetch @@ -265,7 +265,7 @@ def tearDown(self): return super().tearDown() @mock.patch('datalab.datalab_session.file_utils.tempfile.NamedTemporaryFile') - @mock.patch('datalab.datalab_session.data_operations.fits_file_reader.get_fits') + @mock.patch('datalab.datalab_session.data_operations.input_data_handler.get_fits') @mock.patch('datalab.datalab_session.data_operations.fits_output_handler.save_fits_and_thumbnails') @mock.patch('datalab.datalab_session.data_operations.fits_output_handler.create_jpgs') def test_operate(self, mock_create_jpgs, mock_save_fits_and_thumbnails, mock_get_fits, mock_named_tempfile): From 7d0416adbbe0212caa0e12e765b76bc643cb6a5c Mon Sep 17 00:00:00 2001 From: Lloyd Dakin Date: Fri, 11 Oct 2024 16:48:22 -0700 Subject: [PATCH 4/4] changed dlab to datalab --- .../data_operations/fits_output_handler.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/datalab/datalab_session/data_operations/fits_output_handler.py b/datalab/datalab_session/data_operations/fits_output_handler.py index c0a45ba..f0831c1 100644 --- a/datalab/datalab_session/data_operations/fits_output_handler.py +++ b/datalab/datalab_session/data_operations/fits_output_handler.py @@ -14,7 +14,7 @@ class FITSOutputHandler(): The FITS file is then saved to the cache and the large and small jpgs are created. Attributes: - dlab_id (str): The cache key for the FITS file. + datalab_id (str): The cache key for the FITS file. primary_hdu (fits.PrimaryHDU): The primary HDU for the FITS file. image_hdu (fits.ImageHDU): The image HDU for the FITS file. data (np.array): The data for the image HDU. @@ -28,13 +28,13 @@ def __init__(self, cache_key: str, data: np.array, comment: str=None) -> None: data (np.array): The data that will create the image HDU. comment (str): Optionally add a comment to add to the FITS file. """ - self.dlab_id = cache_key + self.datalab_id = cache_key self.primary_hdu = fits.PrimaryHDU(header=fits.Header([('KEY', cache_key)])) self.image_hdu = fits.ImageHDU(data=data, name='SCI') if comment: self.set_comment(comment) def __str__(self) -> str: - return f"Key: {self.dlab_id}\nData:\n{self.data}" + return f"Key: {self.datalab_id}\nData:\n{self.data}" def set_comment(self, comment: str): """Add a comment to the FITS file.""" @@ -52,11 +52,11 @@ def create_and_save_data_products(self, index: int=None, large_jpg_path: str=Non small_jpg (str): Optionally add a path to a small jpg to save, will not create a new jpg. """ hdu_list = fits.HDUList([self.primary_hdu, self.image_hdu]) - fits_output_path = tempfile.NamedTemporaryFile(suffix=f'{self.dlab_id}.fits').name + fits_output_path = tempfile.NamedTemporaryFile(suffix=f'{self.datalab_id}.fits').name hdu_list.writeto(fits_output_path, overwrite=True) # allow for operations to pregenerate the jpgs, ex. RGB stacking if not large_jpg_path or not small_jpg_path: - large_jpg_path, small_jpg_path = create_jpgs(self.dlab_id, fits_output_path) + large_jpg_path, small_jpg_path = create_jpgs(self.datalab_id, fits_output_path) - return save_fits_and_thumbnails(self.dlab_id, fits_output_path, large_jpg_path, small_jpg_path, index) + return save_fits_and_thumbnails(self.datalab_id, fits_output_path, large_jpg_path, small_jpg_path, index)