From 7164d81e2ad0525b77a448ea734f55f550039141 Mon Sep 17 00:00:00 2001 From: Curtis McCully Date: Thu, 20 Aug 2020 15:19:22 -0400 Subject: [PATCH 1/4] Fixes to get old data cube data working in the new version of BANZAI. --- banzai/data.py | 9 ++++++--- banzai/lco.py | 28 ++++++++++++++++++++-------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/banzai/data.py b/banzai/data.py index 295b4c995..90b4786da 100644 --- a/banzai/data.py +++ b/banzai/data.py @@ -27,8 +27,8 @@ def __init__(self, data: Union[np.array, Table], meta: Union[dict, fits.Header], def _validate_mask(self, mask): if mask is not None: - if mask.shape != self.data.shape: - raise ValueError('Mask must have the same dimensions as the data') + if len(mask.shape) != len(self.data.shape): + raise ValueError('Mask has different number of dimensions from the data.') def _init_array(self, array: np.array = None, dtype: Type = None): if not self.memmap: @@ -49,7 +49,10 @@ def _init_array(self, array: np.array = None, dtype: Type = None): def add_mask(self, mask: np.array): self._validate_mask(mask) - self.mask = self._init_array(mask) + self.mask = self._init_array(dtype=np.uint8) + overlap_region = tuple(slice(None, min(mask_shape, data_shape), 1) + for mask_shape, data_shape in zip(mask.shape, self.data.shape)) + self.mask[overlap_region] = mask[overlap_region] def __del__(self): for handle in self._file_handles: diff --git a/banzai/lco.py b/banzai/lco.py index 59958277c..0769d4134 100644 --- a/banzai/lco.py +++ b/banzai/lco.py @@ -367,6 +367,9 @@ def open(self, file_info, runtime_context) -> Optional[ObservationFrame]: for hdu in fits_hdu_list: if hdu.data is None: hdu_list.append(HeaderOnly(meta=hdu.header)) + elif len(hdu.data.shape) > 2: + for munged_hdu in self._munge_data_cube(hdu): + hdu_list.append(munged_hdu) else: hdu_list.append(self.data_class(data=hdu.data, meta=hdu.header, name=hdu.header.get('EXTNAME'))) else: @@ -401,6 +404,7 @@ def open(self, file_info, runtime_context) -> Optional[ObservationFrame]: associated_data[associated_extension['NAME']] = None if len(hdu.data.shape) > 2: hdu_list += self._munge_data_cube(hdu) + break # update datasec/trimsec for fs01 if hdu.header.get('INSTRUME') == 'fs01': self._update_fs01_sections(hdu) @@ -553,8 +557,10 @@ def _munge_data_cube(hdu): :return: List CCDData objects """ # The first extension gets to be a header only object + primary_hdu_gain = hdu.header['GAIN'] + gain_comment = hdu.header.comments['GAIN'] + hdu.header.remove('GAIN') hdu_list = [HeaderOnly(meta=hdu.header)] - # We need to properly set the datasec and detsec keywords in case we didn't read out the # middle row (the "Missing Row Problem"). sinistro_datasecs = {'missing': ['[1:2048,1:2048]', '[1:2048,1:2048]', @@ -566,18 +572,24 @@ def _munge_data_cube(hdu): 'full': ['[1:2048,1:2048]', '[4096:2049,1:2048]', '[4096:2049,4096:2049]', '[1:2048,4096:2049]']} for i in range(hdu.data.shape[0]): - gain = eval(hdu.header['GAIN'])[i] + if isinstance(primary_hdu_gain, str): + gain = eval(primary_hdu_gain)[i] + else: + gain = primary_hdu_gain if hdu.data.shape[1] > 2048: mode = 'full' else: mode = 'missing' datasec = sinistro_datasecs[mode][i] detsec = sinistro_detsecs[mode][i] - header = {'BIASSEC': ('[2055:2080,1:2048]', '[binned pixel] Overscan Region'), - 'GAIN': (gain, hdu.header.comments['GAIN']), - 'DATASEC': (datasec, '[binned pixel] Data section'), - 'DETSEC': (detsec, '[unbinned pixel] Detector section'), - 'CCDSUM': (hdu.header['CCDSUM'], hdu.header.comments['CCDSUM'])} + header = fits.Header() + if hdu.data is not None and hdu.data.dtype == np.uint16: + hdu.data = hdu.data.astype(np.float64) + for keyword, value in {'BIASSEC': ('[2055:2080,1:2048]', '[binned pixel] Overscan Region'), + 'GAIN': (gain, gain_comment), + 'DATASEC': (datasec, '[binned pixel] Data section'), + 'DETSEC': (detsec, '[unbinned pixel] Detector section'), + 'CCDSUM': (hdu.header['CCDSUM'], hdu.header.comments['CCDSUM'])}.items(): + header[keyword] = value hdu_list.append(CCDData(data=hdu.data[i], meta=fits.Header(header))) - # We have to split the gain keyword for each extension return hdu_list From ddb7fd9b042fa6a0f759d2de02b9126bf4425e0a Mon Sep 17 00:00:00 2001 From: Curtis McCully Date: Thu, 20 Aug 2020 17:16:37 -0400 Subject: [PATCH 2/4] Fixes to default behavior in data classes to make unit tests work. --- banzai/data.py | 8 ++++---- banzai/tests/test_bpm.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/banzai/data.py b/banzai/data.py index 90b4786da..bc8143d67 100644 --- a/banzai/data.py +++ b/banzai/data.py @@ -31,15 +31,15 @@ def _validate_mask(self, mask): raise ValueError('Mask has different number of dimensions from the data.') def _init_array(self, array: np.array = None, dtype: Type = None): - if not self.memmap: + if not self.memmap and array is not None: return array - file_handle = tempfile.NamedTemporaryFile('w+b') if array is None: - shape = self.data.shape + shape = np.array(self.data).shape if dtype is None: dtype = self.data.dtype array = np.zeros(shape, dtype=dtype) - if array.size > 0: + if array.size > 0 and self.memmap: + file_handle = tempfile.NamedTemporaryFile('w+b') memory_mapped_array = np.memmap(file_handle, shape=array.shape, dtype=array.dtype, mode='readwrite') memory_mapped_array.ravel()[:] = array.ravel()[:] self._file_handles.append(file_handle) diff --git a/banzai/tests/test_bpm.py b/banzai/tests/test_bpm.py index 011d782f9..80b5ad5da 100644 --- a/banzai/tests/test_bpm.py +++ b/banzai/tests/test_bpm.py @@ -37,7 +37,7 @@ def test_null_input_imags(): @mock.patch('banzai.calibrations.CalibrationUser.get_calibration_file_info', return_value={'filename': 'test.fits'}) def test_adds_good_bpm(mock_bpm_name, mock_bpm, set_random_seed): image = FakeLCOObservationFrame(hdu_list=[FakeCCDData(memmap=False)]) - master_image = FakeLCOObservationFrame(hdu_list=[FakeCCDData(data=make_test_bpm(101,103), memmap=False)], + master_image = FakeLCOObservationFrame(hdu_list=[FakeCCDData(data=make_test_bpm(101, 103), memmap=False)], file_path='test.fits') mock_bpm.return_value = master_image tester = BadPixelMaskLoader(FakeContext()) @@ -84,7 +84,7 @@ def test_uses_fallback_if_bpm_missing_and_no_bpm_set(mock_get_bpm_filename): def test_removes_image_if_wrong_shape(mock_get_bpm_filename, mock_bpm, set_random_seed): image = FakeLCOObservationFrame(hdu_list=[FakeCCDData(memmap=False)]) mock_bpm.return_value = FakeLCOObservationFrame(hdu_list=[FakeCCDData(data=make_test_bpm(image.data.shape[1] + 1, - image.data.shape[0]))]) + image.data.shape[0], make_3d=True))]) tester = BadPixelMaskLoader(FakeContext()) assert tester.do_stage(image) is None @@ -95,7 +95,7 @@ def test_removes_image_wrong_shape_3d(mock_get_bpm_filename, mock_bpm, set_rando image = FakeLCOObservationFrame(hdu_list=[FakeCCDData(memmap=False)]) master_image = FakeLCOObservationFrame( hdu_list=[FakeCCDData(data=bpm_data, memmap=False) for bpm_data in make_test_bpm(image.data.shape[1] + 1, - image.data.shape[0], make_3d=True)], + image.data.shape[0], make_3d=False)], file_path='test.fits') mock_bpm.return_value = master_image tester = BadPixelMaskLoader(FakeContext()) From 8b2ab4d36d7d021e20fc51c112f3f416ec7f5803 Mon Sep 17 00:00:00 2001 From: Curtis McCully Date: Fri, 28 Aug 2020 13:24:11 -0400 Subject: [PATCH 3/4] Increased the timeout on the sqlalchemy db for testing. --- banzai/dbs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/banzai/dbs.py b/banzai/dbs.py index 88acc7b76..f849ababd 100755 --- a/banzai/dbs.py +++ b/banzai/dbs.py @@ -36,7 +36,7 @@ def get_session(db_address): session: SQLAlchemy Database Session """ # Build a new engine for each session. This makes things thread safe. - engine = create_engine(db_address, poolclass=pool.NullPool) + engine = create_engine(db_address, poolclass=pool.NullPool, connect_args={'timeout': 30}) Base.metadata.bind = engine # We don't use autoflush typically. I have run into issues where SQLAlchemy would try to flush From 36174c5b59a0fd0dbd8e4e438101da266d6ce7a6 Mon Sep 17 00:00:00 2001 From: Curtis McCully Date: Mon, 31 Aug 2020 16:56:41 -0400 Subject: [PATCH 4/4] Fix to WCS on data cube images. --- banzai/dbs.py | 11 ++++++----- banzai/lco.py | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/banzai/dbs.py b/banzai/dbs.py index f849ababd..2cc4df4d0 100755 --- a/banzai/dbs.py +++ b/banzai/dbs.py @@ -11,7 +11,6 @@ import logging import datetime from dateutil.parser import parse -import copy import numpy as np import requests from sqlalchemy import create_engine, pool, type_coerce, cast @@ -36,7 +35,10 @@ def get_session(db_address): session: SQLAlchemy Database Session """ # Build a new engine for each session. This makes things thread safe. - engine = create_engine(db_address, poolclass=pool.NullPool, connect_args={'timeout': 30}) + if 'sqlite' in db_address: + engine = create_engine(db_address, poolclass=pool.NullPool, connect_args={'timeout': 30}) + else: + engine = create_engine(db_address) Base.metadata.bind = engine # We don't use autoflush typically. I have run into issues where SQLAlchemy would try to flush @@ -449,8 +451,7 @@ def populate_instrument_tables(db_address, configdb_address): added to the network. """ sites, instruments = parse_configdb(configdb_address=configdb_address) - with get_session(db_address=db_address) as db_session: - for site in sites: - add_site(site, db_address) + for site in sites: + add_site(site, db_address) for instrument in instruments: add_instrument(instrument, db_address) diff --git a/banzai/lco.py b/banzai/lco.py index 0769d4134..6b86873fa 100644 --- a/banzai/lco.py +++ b/banzai/lco.py @@ -560,7 +560,7 @@ def _munge_data_cube(hdu): primary_hdu_gain = hdu.header['GAIN'] gain_comment = hdu.header.comments['GAIN'] hdu.header.remove('GAIN') - hdu_list = [HeaderOnly(meta=hdu.header)] + hdu_list = [HeaderOnly(meta=fits_utils.sanitize_header(hdu.header))] # We need to properly set the datasec and detsec keywords in case we didn't read out the # middle row (the "Missing Row Problem"). sinistro_datasecs = {'missing': ['[1:2048,1:2048]', '[1:2048,1:2048]',