From e8e847f4146a31431da377c9e572cecb9cb61b4d Mon Sep 17 00:00:00 2001 From: "Matthew R. Becker" Date: Tue, 23 Aug 2022 11:36:22 -0500 Subject: [PATCH 1/5] Update prepsfmom.py --- ngmix/prepsfmom.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ngmix/prepsfmom.py b/ngmix/prepsfmom.py index 4604e1f7..a3d293c4 100644 --- a/ngmix/prepsfmom.py +++ b/ngmix/prepsfmom.py @@ -92,14 +92,14 @@ def _meas(self, obs, psf_obs, return_kernels): eff_pad_factor = target_dim / obs.image.shape[0] # pad image, psf and weight map, get FFTs, apply cen_phases - kim, im_row, im_col = _zero_pad_and_compute_fft_cached( + kim, im_row, im_col = _zero_pad_and_compute_fft_impl( obs.image, obs.jacobian.row0, obs.jacobian.col0, target_dim, self.ap_rad, ) fft_dim = kim.shape[0] if psf_obs is not None: - kpsf_im, psf_im_row, psf_im_col = _zero_pad_and_compute_fft_cached( + kpsf_im, psf_im_row, psf_im_col = _zero_pad_and_compute_fft_impl( psf_obs.image, psf_obs.jacobian.row0, psf_obs.jacobian.col0, target_dim, From aaa2e56e2abbc673d1ae31037ac1e7b77d85f1e1 Mon Sep 17 00:00:00 2001 From: beckermr Date: Tue, 23 Aug 2022 11:49:44 -0500 Subject: [PATCH 2/5] try this --- ngmix/prepsfmom.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ngmix/prepsfmom.py b/ngmix/prepsfmom.py index a3d293c4..8a39af5f 100644 --- a/ngmix/prepsfmom.py +++ b/ngmix/prepsfmom.py @@ -292,6 +292,15 @@ def _measure_moments_fft( fkc = kernels["fkc"] mom_norm = kernels["fk00"] + return _numba_bits( + kim, fkf, fkr, fkp, fkc, eff_pad_factor, kpsf_im, mom_norm, tot_var, df2, df4 + ) + + +@njit +def _numba_bits( + kim, fkf, fkr, fkp, fkc, eff_pad_factor, kpsf_im, mom_norm, tot_var, df2, df4 +): mf = np.sum((kim * fkf).real) * df2 mr = np.sum((kim * fkr).real) * df2 mp = np.sum((kim * fkp).real) * df2 From 3f9b6bd1951ab109d99a41ef9824fde5806b5a13 Mon Sep 17 00:00:00 2001 From: beckermr Date: Tue, 23 Aug 2022 11:53:25 -0500 Subject: [PATCH 3/5] more --- ngmix/prepsfmom.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ngmix/prepsfmom.py b/ngmix/prepsfmom.py index 8a39af5f..849ff927 100644 --- a/ngmix/prepsfmom.py +++ b/ngmix/prepsfmom.py @@ -92,14 +92,14 @@ def _meas(self, obs, psf_obs, return_kernels): eff_pad_factor = target_dim / obs.image.shape[0] # pad image, psf and weight map, get FFTs, apply cen_phases - kim, im_row, im_col = _zero_pad_and_compute_fft_impl( + kim, im_row, im_col = _zero_pad_and_compute_fft_cached( obs.image, obs.jacobian.row0, obs.jacobian.col0, target_dim, self.ap_rad, ) fft_dim = kim.shape[0] if psf_obs is not None: - kpsf_im, psf_im_row, psf_im_col = _zero_pad_and_compute_fft_impl( + kpsf_im, psf_im_row, psf_im_col = _zero_pad_and_compute_fft_cached( psf_obs.image, psf_obs.jacobian.row0, psf_obs.jacobian.col0, target_dim, From e80ba16e9c64247066c5aa53ba97d7e7b242c80a Mon Sep 17 00:00:00 2001 From: beckermr Date: Tue, 23 Aug 2022 15:07:39 -0500 Subject: [PATCH 4/5] PERF more numba --- ngmix/prepsfmom.py | 49 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/ngmix/prepsfmom.py b/ngmix/prepsfmom.py index 849ff927..7ab266a6 100644 --- a/ngmix/prepsfmom.py +++ b/ngmix/prepsfmom.py @@ -402,14 +402,51 @@ def _compute_cen_phase_shift(cen_row, cen_col, dim, msk=None): of that profile will result in an FFT centered at the profile. """ f = fft.fftfreq(dim) * (2.0 * np.pi) - # this reshaping makes sure the arrays broadcast nicely into a grid - fx = f.reshape(1, -1) - fy = f.reshape(-1, 1) - kcen = fy*cen_row + fx*cen_col if msk is not None: - return np.cos(kcen[msk]) + 1j*np.sin(kcen[msk]) + _msk = np.ravel(msk).astype(int) + return _comp_phase_loop_msk(cen_row, cen_col, f, _msk, np.sum(_msk)) else: - return np.cos(kcen) + 1j*np.sin(kcen) + return _comp_phase_double_loop(cen_row, cen_col, f) + + # old code not optimized + # # this reshaping makes sure the arrays broadcast nicely into a grid + # fx = f.reshape(1, -1) + # fy = f.reshape(-1, 1) + # kcen = fy*cen_row + fx*cen_col + # + # if msk is not None: + # kcen = kcen[msk] + # + # return np.cos(kcen) + 1j*np.sin(kcen) + + +@njit +def _comp_phase_loop_msk(cen_row, cen_col, f, msk, tot): + n = f.shape[0] + res = np.zeros(tot, dtype=np.cdouble) + loc = 0 + rloc = 0 + for j in range(n): + row_fac = f[j] * cen_row + for i in range(n): + if msk[loc]: + kcen = f[i] * cen_col + row_fac + res[rloc] = np.cos(kcen) + 1j*np.sin(kcen) + rloc += 1 + loc += 1 + return res + + +@njit +def _comp_phase_double_loop(cen_row, cen_col, f): + n = f.shape[0] + res = np.zeros((n, n), dtype=np.cdouble) + for j in range(n): + row_fac = f[j] * cen_row + for i in range(n): + kcen = f[i] * cen_col + row_fac + res[j, i] = np.cos(kcen) + 1j*np.sin(kcen) + return res def _zero_pad_and_compute_fft_impl(im, cen_row, cen_col, target_dim, ap_rad): From 4863e2559e294700fa04bb9c6d1c13462acc2942 Mon Sep 17 00:00:00 2001 From: beckermr Date: Tue, 23 Aug 2022 18:17:45 -0500 Subject: [PATCH 5/5] PERF this works a touch better --- ngmix/prepsfmom.py | 72 ++++++++++------------------------- ngmix/tests/test_prepsfmom.py | 2 +- 2 files changed, 22 insertions(+), 52 deletions(-) diff --git a/ngmix/prepsfmom.py b/ngmix/prepsfmom.py index 7ab266a6..c015e9bc 100644 --- a/ngmix/prepsfmom.py +++ b/ngmix/prepsfmom.py @@ -276,15 +276,6 @@ def _measure_moments_fft( cen_phase = _compute_cen_phase_shift(drow, dcol, dim, msk=msk) kim *= cen_phase - # build the flux, radial, plus and cross kernels / moments - # the inverse FFT in our convention has a factor of 1/n per dimension - # the sums below are inverse FFTs but only computing the values at the - # real-space center of the object (0 in our coordinate system). - # thus we code the factor of 1/n by hand - df = 1/dim - df2 = df * df - df4 = df2 * df2 - # we only sum where the kernel is nonzero fkf = kernels["fkf"] fkr = kernels["fkr"] @@ -293,14 +284,24 @@ def _measure_moments_fft( mom_norm = kernels["fk00"] return _numba_bits( - kim, fkf, fkr, fkp, fkc, eff_pad_factor, kpsf_im, mom_norm, tot_var, df2, df4 + kim, fkf, fkr, fkp, fkc, eff_pad_factor, kpsf_im, mom_norm, tot_var, dim, ) @njit def _numba_bits( - kim, fkf, fkr, fkp, fkc, eff_pad_factor, kpsf_im, mom_norm, tot_var, df2, df4 + kim, fkf, fkr, fkp, fkc, eff_pad_factor, kpsf_im, mom_norm, tot_var, dim, ): + + # build the flux, radial, plus and cross kernels / moments + # the inverse FFT in our convention has a factor of 1/n per dimension + # the sums below are inverse FFTs but only computing the values at the + # real-space center of the object (0 in our coordinate system). + # thus we code the factor of 1/n by hand + df = 1/dim + df2 = df * df + df4 = df2 * df2 + mf = np.sum((kim * fkf).real) * df2 mr = np.sum((kim * fkr).real) * df2 mp = np.sum((kim * fkp).real) * df2 @@ -402,51 +403,20 @@ def _compute_cen_phase_shift(cen_row, cen_col, dim, msk=None): of that profile will result in an FFT centered at the profile. """ f = fft.fftfreq(dim) * (2.0 * np.pi) - if msk is not None: - _msk = np.ravel(msk).astype(int) - return _comp_phase_loop_msk(cen_row, cen_col, f, _msk, np.sum(_msk)) - else: - return _comp_phase_double_loop(cen_row, cen_col, f) - - # old code not optimized - # # this reshaping makes sure the arrays broadcast nicely into a grid - # fx = f.reshape(1, -1) - # fy = f.reshape(-1, 1) - # kcen = fy*cen_row + fx*cen_col - # - # if msk is not None: - # kcen = kcen[msk] - # - # return np.cos(kcen) + 1j*np.sin(kcen) + # this reshaping makes sure the arrays broadcast nicely into a grid + fx = f.reshape(1, -1) + fy = f.reshape(-1, 1) + kcen = fy*cen_row + fx*cen_col + if msk is not None: + kcen = kcen[msk] -@njit -def _comp_phase_loop_msk(cen_row, cen_col, f, msk, tot): - n = f.shape[0] - res = np.zeros(tot, dtype=np.cdouble) - loc = 0 - rloc = 0 - for j in range(n): - row_fac = f[j] * cen_row - for i in range(n): - if msk[loc]: - kcen = f[i] * cen_col + row_fac - res[rloc] = np.cos(kcen) + 1j*np.sin(kcen) - rloc += 1 - loc += 1 - return res + return _comp_phase(kcen) @njit -def _comp_phase_double_loop(cen_row, cen_col, f): - n = f.shape[0] - res = np.zeros((n, n), dtype=np.cdouble) - for j in range(n): - row_fac = f[j] * cen_row - for i in range(n): - kcen = f[i] * cen_col + row_fac - res[j, i] = np.cos(kcen) + 1j*np.sin(kcen) - return res +def _comp_phase(kcen): + return np.cos(kcen) + 1j*np.sin(kcen) def _zero_pad_and_compute_fft_impl(im, cen_row, cen_col, target_dim, ap_rad): diff --git a/ngmix/tests/test_prepsfmom.py b/ngmix/tests/test_prepsfmom.py index 7b4cdd77..6488b9a0 100644 --- a/ngmix/tests/test_prepsfmom.py +++ b/ngmix/tests/test_prepsfmom.py @@ -211,7 +211,7 @@ def test_prepsfmom_speed_and_cache(): assert _zero_pad_and_compute_fft_cached_impl.cache_info().misses == 4 # now we test with full caching - nfit = 1000 + nfit = 2000 dt = time.time() for _ in range(nfit): with obs.writeable():