From 2d6bcaed4256f11d7196acc2a79f6a03436c5f13 Mon Sep 17 00:00:00 2001 From: Lloyd Dakin Date: Mon, 20 May 2024 15:54:23 -0700 Subject: [PATCH] error change, removing tempfile and memmap from fits.open since it does that by default already, also there was a memmap error in prod --- .../data_operations/data_operation.py | 47 ++++++++----------- datalab/datalab_session/util.py | 3 +- 2 files changed, 21 insertions(+), 29 deletions(-) diff --git a/datalab/datalab_session/data_operations/data_operation.py b/datalab/datalab_session/data_operations/data_operation.py index cc8833e..9e8b805 100644 --- a/datalab/datalab_session/data_operations/data_operation.py +++ b/datalab/datalab_session/data_operations/data_operation.py @@ -136,30 +136,23 @@ def create_and_store_fits(self, hdu_list: fits.HDUList, percent=None, cur_percen def get_fits_npdata(self, input_files: list[dict], percent=None, cur_percent=None) -> list[np.memmap]: total_files = len(input_files) - memmap_paths = [] - - # get the fits urls, download their file, extract the image data, and store in a list - with tempfile.TemporaryDirectory() as temp_dir: - for index, file_info in enumerate(input_files, start=1): - basename = file_info.get('basename', 'No basename found') - archive_record = get_archive_from_basename(basename) - - try: - fits_url = archive_record[0].get('url', 'No URL found') - except IndexError: - continue - - with fits.open(fits_url) as hdu_list: - data = hdu_list['SCI'].data - memmap_path = os.path.join(temp_dir, f'memmap_{index}.dat') - memmap_array = np.memmap(memmap_path, dtype=data.dtype, mode='w+', shape=data.shape) - memmap_array[:] = data[:] - memmap_paths.append(memmap_path) - - if percent is not None and cur_percent is not None: - self.set_percent_completion(cur_percent + index/total_files * percent) - - return [ - np.memmap(path, dtype=np.float32, mode='r', shape=memmap_array.shape) - for path in memmap_paths - ] + image_data_list = [] + + # get the fits urls and extract the image data + for index, file_info in enumerate(input_files, start=1): + basename = file_info.get('basename', 'No basename found') + archive_record = get_archive_from_basename(basename) + + try: + fits_url = archive_record[0].get('url', 'No URL found') + except Exception as e: + raise FileNotFoundError(f"No image found with specified basename: {basename} Error: {e}") + + with fits.open(fits_url) as hdu_list: + data = hdu_list['SCI'].data + image_data_list.append(data) + + if percent is not None and cur_percent is not None: + self.set_percent_completion(cur_percent + index/total_files * percent) + + return image_data_list diff --git a/datalab/datalab_session/util.py b/datalab/datalab_session/util.py index 724fb10..4dc943f 100644 --- a/datalab/datalab_session/util.py +++ b/datalab/datalab_session/util.py @@ -97,8 +97,7 @@ def get_archive_from_basename(basename: str) -> dict: image_data = response.json() results = image_data.get('results', None) except Exception as e: - log.error(f"failed to fetch {basename} from archive, Error: {e}") - raise FileNotFoundError + raise FileNotFoundError(f"Error fetching image data from archive: {e}") return results