Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PERF use more numba #226

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 26 additions & 10 deletions ngmix/prepsfmom.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,23 @@ def _measure_moments_fft(
cen_phase = _compute_cen_phase_shift(drow, dcol, dim, msk=msk)
kim *= cen_phase

# we only sum where the kernel is nonzero
fkf = kernels["fkf"]
fkr = kernels["fkr"]
fkp = kernels["fkp"]
fkc = kernels["fkc"]

mom_norm = kernels["fk00"]
return _numba_bits(
kim, fkf, fkr, fkp, fkc, eff_pad_factor, kpsf_im, mom_norm, tot_var, dim,
)


@njit
def _numba_bits(
kim, fkf, fkr, fkp, fkc, eff_pad_factor, kpsf_im, mom_norm, tot_var, dim,
):

# build the flux, radial, plus and cross kernels / moments
# the inverse FFT in our convention has a factor of 1/n per dimension
# the sums below are inverse FFTs but only computing the values at the
Expand All @@ -285,13 +302,6 @@ def _measure_moments_fft(
df2 = df * df
df4 = df2 * df2

# we only sum where the kernel is nonzero
fkf = kernels["fkf"]
fkr = kernels["fkr"]
fkp = kernels["fkp"]
fkc = kernels["fkc"]

mom_norm = kernels["fk00"]
mf = np.sum((kim * fkf).real) * df2
mr = np.sum((kim * fkr).real) * df2
mp = np.sum((kim * fkp).real) * df2
Expand Down Expand Up @@ -397,10 +407,16 @@ def _compute_cen_phase_shift(cen_row, cen_col, dim, msk=None):
fx = f.reshape(1, -1)
fy = f.reshape(-1, 1)
kcen = fy*cen_row + fx*cen_col

if msk is not None:
return np.cos(kcen[msk]) + 1j*np.sin(kcen[msk])
else:
return np.cos(kcen) + 1j*np.sin(kcen)
kcen = kcen[msk]

return _comp_phase(kcen)


@njit
def _comp_phase(kcen):
return np.cos(kcen) + 1j*np.sin(kcen)


def _zero_pad_and_compute_fft_impl(im, cen_row, cen_col, target_dim, ap_rad):
Expand Down
2 changes: 1 addition & 1 deletion ngmix/tests/test_prepsfmom.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def test_prepsfmom_speed_and_cache():
assert _zero_pad_and_compute_fft_cached_impl.cache_info().misses == 4

# now we test with full caching
nfit = 1000
nfit = 2000
dt = time.time()
for _ in range(nfit):
with obs.writeable():
Expand Down