Merge pull request macauff#81 from macauff/new_source_counts_model_co…

…rrection New source counts model correction
lsst-uk · Apr 18, 2024 · ef85a29 · ef85a29
2 parents 9163236 + 403f3f4
commit ef85a29
Show file tree

Hide file tree

Showing 13 changed files with 225 additions and 57 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -16,6 +16,10 @@ Bug Fixes
 API Changes
 ^^^^^^^^^^^
 
+- ``CrossMatch`` now expects ``saturation_magnitudes`` as an input parameter in
+  its input files, if ``fit_gal_flag`` or ``correct_astrometry`` are
+  ``True``. [#81]
+
 - ``AstrometricCorrections`` accepts pre-computed TRILEGAL histograms, following
   the expansion of ``CrossMatch`` accepting them. [#79]
 

diff --git a/docs/inputs.rst b/docs/inputs.rst
@@ -184,7 +184,7 @@ the inputs required if either ``correct_astrometry`` or ``compute_snr_mag_relati
 
 and the inputs required if ``correct_astrometry`` is ``True``:
 
-``best_mag_index``, ``nn_radius``, ``ref_csv_cat_file_string``, ``correct_mag_array``, ``correct_mag_slice``, ``correct_sig_slice``, ``chunk_overlap_col``, and ``best_mag_index_col``.
+``best_mag_index``, ``nn_radius``, ``ref_csv_cat_file_string``, ``correct_mag_array``, ``correct_mag_slice``, ``correct_sig_slice``, ``chunk_overlap_col``, ``best_mag_index_col``, and ``saturation_magnitudes``.
 
 .. note::
     ``run_fw_auf``, ``run_psf_auf``, ``psf_fwhms``, ``snr_mag_params_path``, ``download_tri``, ``tri_set_name``, ``tri_filt_names``, ``tri_filt_num``, ``tri_maglim_faint``, ``tri_num_faint``, ``dens_dist``, ``dd_params_path``, ``l_cut_path``, ``gal_wavs``, ``gal_zmax``, ``gal_nzs``, ``gal_aboffsets``, ``gal_filternames``, and ``gal_al_avs`` are all currently required if ``correct_astrometry`` is ``True``, bypassing the nested flags above. For example, ``dens_dist`` is required as an input if ``include_perturb_auf`` is ``True``, or if ``correct_astrometry`` is set. This means that ``AstrometricCorrections`` implicitly always runs and fits for a full Astrometric Uncertainty Function.
@@ -426,6 +426,10 @@ The zero-indexed integer column number in the original input csv file used in ``
 
 Boolean flag indicating whether the astrometric or photometric uncertainties of the input catalogue should be used to derive the astrometric uncertainties from ensemble statistics in ``AstrometricCorrections``.
 
+``saturation_magnitudes``
+
+A list, one float per filter, of the magnitudes in the given filter at which the telescope or survey saturates, used in the filtering of source counts for model-fitting purposes in ``AstrometricCorrections``.
+
 
 Parameter Dependency Graph
 ==========================

diff --git a/src/.pylintrc b/src/.pylintrc
@@ -188,7 +188,7 @@ good-names=i,
 
 # Good variable names regexes, separated by a comma. If names match any regex,
 # they will always be accepted
-good-names-rgxs=
+good-names-rgxs=^[_a-zA-Z][_a-z0-9A-Z]?$
 
 # Include a hint for the correct naming format with invalid-name.
 include-naming-hint=no

diff --git a/src/macauff/derive_psf_auf_params.py b/src/macauff/derive_psf_auf_params.py
@@ -20,7 +20,7 @@
 
 # Assume that usetex = False only applies for tests where no TeX is installed
 # at all, instead of users having half-installed TeX, dvipng et al. somewhere.
-usetex = not not shutil.which("tex")  # pylint: disable=unnecessary-negation
+usetex = not not shutil.which("tex")  # pylint: disable=unneeded-not
 if usetex:
     plt.rcParams.update({"text.usetex": True, "text.latex.preamble": r"\usepackage{amsmath}"})
 

diff --git a/src/macauff/fit_astrometry.py b/src/macauff/fit_astrometry.py
@@ -24,14 +24,14 @@
 
 # Assume that usetex = False only applies for tests where no TeX is installed
 # at all, instead of users having half-installed TeX, dvipng et al. somewhere.
-usetex = not not shutil.which("tex")  # pylint: disable=unnecessary-negation
+usetex = not not shutil.which("tex")  # pylint: disable=unneeded-not
 if usetex:
     plt.rcParams.update({"text.usetex": True, "text.latex.preamble": r"\usepackage{amsmath}"})
 
 # pylint: disable=wrong-import-position,import-error,no-name-in-module
 from macauff.galaxy_counts import create_galaxy_counts
 from macauff.get_trilegal_wrapper import get_av_infinity
-from macauff.misc_functions import min_max_lon
+from macauff.misc_functions import find_model_counts_corrections, min_max_lon
 from macauff.misc_functions_fortran import misc_functions_fortran as mff
 from macauff.perturbation_auf import (
     _calculate_magnitude_offsets,
@@ -56,8 +56,8 @@ def __init__(self, psf_fwhm, numtrials, nn_radius, dens_search_radius, save_fold
                  gal_wav_micron, gal_ab_offset, gal_filtname, gal_alav, dm, dd_params, l_cut,
                  ax1_mids, ax2_mids, ax_dimension, mag_array, mag_slice, sig_slice, n_pool,
                  npy_or_csv, coord_or_chunk, pos_and_err_indices, mag_indices, mag_unc_indices,
-                 mag_names, best_mag_index, coord_system, pregenerate_cutouts, n_r, n_rho, max_rho,
-                 trifolder=None, triname=None, maglim_f=None, magnum=None, tri_num_faint=None,
+                 mag_names, best_mag_index, coord_system, saturation_magnitudes, pregenerate_cutouts, n_r,
+                 n_rho, max_rho, trifolder=None, triname=None, maglim_f=None, magnum=None, tri_num_faint=None,
                  trifilterset=None, trifiltname=None, tri_hist=None, tri_mags=None, dtri_mags=None,
                  tri_uncert=None, use_photometric_uncertainties=False, cutout_area=None, cutout_height=None,
                  single_sided_auf=True, chunks=None, return_nm=False):
@@ -172,6 +172,9 @@ def __init__(self, psf_fwhm, numtrials, nn_radius, dens_search_radius, save_fold
             Identifier of which coordinate system the data are in. Both datasets
             must be in the same system, which can either be RA/Dec (equatorial)
             or l/b (galactic) coordinates.
+        saturation_magnitudes : list or numpy.ndarray
+            List of magnitudes brighter than which the given survey suffers
+            saturation, each element matching the filter of ``mag_indices``.
         pregenerate_cutouts : boolean or None
             Indicates whether sightline catalogues must have been pre-made,
             or whether they can be generated by ``AstrometricCorrections``
@@ -339,6 +342,8 @@ def __init__(self, psf_fwhm, numtrials, nn_radius, dens_search_radius, save_fold
         self.mag_slice = np.array(mag_slice)
         self.sig_slice = np.array(sig_slice)
 
+        self.saturation_magnitudes = np.array(saturation_magnitudes)
+
         self.npy_or_csv = npy_or_csv
         self.coord_or_chunk = coord_or_chunk
         self.chunks = chunks
@@ -944,7 +949,7 @@ def fit_snr_sqrt(p, x, y):
 
         return res, s_bins, s_d_snr_med, s_d_snr_dmed, snr_med, snr_dmed
 
-    def make_star_galaxy_counts(self):
+    def make_star_galaxy_counts(self):  # pylint: disable=too-many-locals
         """
         Generate differential source counts for each cutout region, simulating
         both stars and galaxies.
@@ -1043,7 +1048,27 @@ def make_star_galaxy_counts(self):
             self.gal_wav_micron, self.gal_alpha0, self.gal_alpha1, self.gal_alphaweight,
             self.gal_ab_offset, self.gal_filtname, self.gal_alav*avs)
 
-        log10y = np.log10(tri_hist + gal_dns)
+        mag_ind = self.mag_indices[self.best_mag_index]
+        data_mags = self.b[~np.isnan(self.b[:, mag_ind]), mag_ind]
+        data_hist, data_bins = np.histogram(data_mags, bins='auto')
+        d_hc = np.where(data_hist > 3)[0]
+        data_hist = data_hist[d_hc]
+        data_dbins = np.diff(data_bins)[d_hc]
+        data_bins = data_bins[d_hc]
+
+        rect_area = (ax1_max - (ax1_min)) * (
+            np.sin(np.radians(ax2_max)) - np.sin(np.radians(ax2_min))) * 180/np.pi
+
+        data_uncert = np.sqrt(data_hist) / data_dbins / rect_area
+        data_hist = data_hist / data_dbins / rect_area
+        data_loghist = np.log10(data_hist)
+        data_dloghist = 1/np.log(10) * data_uncert / data_hist
+
+        q = (data_bins <= maxmag) & (data_bins >= self.saturation_magnitudes[self.best_mag_index])
+        tri_corr, gal_corr = find_model_counts_corrections(data_loghist[q], data_dloghist[q],
+                                                           data_bins[q]+data_dbins[q]/2, tri_hist, gal_dns,
+                                                           tri_mags+dtri_mags/2)
+        log10y = np.log10(tri_hist * tri_corr + gal_dns * gal_corr)
         new_uncert = np.sqrt(tri_uncert**2 + (0.05*gal_dns)**2)
         dlog10y = 1/np.log(10) * new_uncert / (tri_hist + gal_dns)
 
@@ -1053,6 +1078,7 @@ def make_star_galaxy_counts(self):
         self.tri_hist, self.tri_mags, self.dtri_mags = tri_hist, tri_mags, dtri_mags
         self.tri_uncert, self.gal_dns = tri_uncert, gal_dns
         self.minmag, self.maxmag, self.n_norm = minmag, maxmag, n_norm
+        self.tri_corr, self.gal_corr = tri_corr, gal_corr
 
     def plot_star_galaxy_counts(self):
         """
@@ -1075,16 +1101,12 @@ def plot_star_galaxy_counts(self):
 
         mag_ind = self.mag_indices[self.best_mag_index]
         data_mags = self.b[~np.isnan(self.b[:, mag_ind]), mag_ind]
-        # Correction to model is the ratio of data counts per unit area
-        # to model source density.
-        correction = np.sum((data_mags >= self.minmag) &
-                            (data_mags <= self.maxmag)) / rect_area / self.n_norm
 
         ax = plt.subplot(gs[0])
         ax1_name = 'l' if self.coord_system == 'galactic' else 'RA'
         ax2_name = 'b' if self.coord_system == 'galactic' else 'Dec'
         ax.set_title(f'{ax1_name} = {ax1_mid}, {ax2_name} = {ax2_mid}')
-        ax.errorbar(self.tri_mags+self.dtri_mags/2, self.log10y + np.log10(correction),
+        ax.errorbar(self.tri_mags+self.dtri_mags/2, self.log10y,
                     yerr=self.dlog10y, c='k', marker='.', zorder=1, ls='None')
 
         data_hist, data_bins = np.histogram(data_mags, bins='auto')
@@ -1103,10 +1125,10 @@ def plot_star_galaxy_counts(self):
         lims = ax.get_ylim()
         q = self.tri_hist > 0
         ax.plot((self.tri_mags+self.dtri_mags/2)[q], np.log10(self.tri_hist[q]) +
-                np.log10(correction), 'b--')
+                np.log10(self.tri_corr), 'k--')
         q = self.gal_dns > 0
         ax.plot((self.tri_mags+self.dtri_mags/2)[q], np.log10(self.gal_dns[q]) +
-                np.log10(correction), 'b:')
+                np.log10(self.gal_corr), 'k:')
         ax.set_ylim(*lims)
 
         ax.set_xlabel('Magnitude')
@@ -1786,7 +1808,7 @@ def _get_cart_kdt(coord):
     mag_cut_ucoords = mag_cut_cat.realize_frame(mag_cut_urepr)
     mag_cut_kdt = _get_cart_kdt(mag_cut_ucoords)
 
-    r = (2 * np.sin(Angle(search_radius * u.degree) / 2.0)).value
+    r = (2 * np.sin(Angle(search_radius * u.degree) / 2.0)).value  # pylint: disable=no-member
     overlap_number = np.empty(len(b), int)
 
     counter = np.arange(0, len(b))

diff --git a/src/macauff/matching.py b/src/macauff/matching.py
@@ -188,11 +188,11 @@ def _initialise_chunk(self, joint_file_path, cat_a_file_path, cat_b_file_path):
                 self.a_correct_mag_slice, self.a_correct_sig_slice, self.n_pool, a_npy_or_csv,
                 a_coord_or_chunk, self.a_pos_and_err_indices, self.a_mag_indices, self.a_mag_unc_indices,
                 self.a_filt_names, self.a_best_mag_index, self.a_auf_region_frame,
-                trifolder=self.a_auf_folder_path, triname=a_correct_astro_tri_name,
-                maglim_f=self.a_tri_maglim_faint, magnum=self.a_tri_filt_num,
-                tri_num_faint=self.a_tri_num_faint, trifilterset=self.a_tri_set_name,
-                trifiltname=self.a_tri_filt_names[acbi], tri_hist=self.a_dens_hist_tri_list[acbi],
-                tri_mags=self.a_tri_model_mags_list[acbi],
+                self.a_saturation_magnitudes, trifolder=self.a_auf_folder_path,
+                triname=a_correct_astro_tri_name, maglim_f=self.a_tri_maglim_faint,
+                magnum=self.a_tri_filt_num, tri_num_faint=self.a_tri_num_faint,
+                trifilterset=self.a_tri_set_name, trifiltname=self.a_tri_filt_names[acbi],
+                tri_hist=self.a_dens_hist_tri_list[acbi], tri_mags=self.a_tri_model_mags_list[acbi],
                 dtri_mags=self.a_tri_model_mags_interval_list[acbi],
                 tri_uncert=self.a_tri_dens_uncert_list[acbi],
                 use_photometric_uncertainties=self.a_use_photometric_uncertainties, pregenerate_cutouts=True,
@@ -260,11 +260,11 @@ def _initialise_chunk(self, joint_file_path, cat_a_file_path, cat_b_file_path):
                 self.b_correct_mag_slice, self.b_correct_sig_slice, self.n_pool, b_npy_or_csv,
                 b_coord_or_chunk, self.b_pos_and_err_indices, self.b_mag_indices, self.b_mag_unc_indices,
                 self.b_filt_names, self.b_best_mag_index, self.b_auf_region_frame,
-                trifolder=self.b_auf_folder_path, triname=b_correct_astro_tri_name,
-                maglim_f=self.b_tri_maglim_faint, magnum=self.b_tri_filt_num,
-                tri_num_faint=self.b_tri_num_faint, trifilterset=self.b_tri_set_name,
-                trifiltname=self.b_tri_filt_names[bcbi], tri_hist=self.b_dens_hist_tri_list[bcbi],
-                tri_mags=self.b_tri_model_mags_list[bcbi],
+                self.b_saturation_magnitudes, trifolder=self.b_auf_folder_path,
+                triname=b_correct_astro_tri_name, maglim_f=self.b_tri_maglim_faint,
+                magnum=self.b_tri_filt_num, tri_num_faint=self.b_tri_num_faint,
+                trifilterset=self.b_tri_set_name, trifiltname=self.b_tri_filt_names[bcbi],
+                tri_hist=self.b_dens_hist_tri_list[bcbi], tri_mags=self.b_tri_model_mags_list[bcbi],
                 dtri_mags=self.b_tri_model_mags_interval_list[bcbi],
                 tri_uncert=self.b_tri_dens_uncert_list[bcbi],
                 use_photometric_uncertainties=self.b_use_photometric_uncertainties,
@@ -1113,12 +1113,12 @@ def read_metadata(self):
                         fit_gal_flag = self.b_fit_gal_flag
                 if correct_astro or fit_gal_flag:
                     for check_flag in ['gal_wavs', 'gal_zmax', 'gal_nzs',
-                                       'gal_aboffsets', 'gal_filternames']:
+                                       'gal_aboffsets', 'gal_filternames', 'saturation_magnitudes']:
                         if check_flag not in config:
                             raise ValueError(f"Missing key {check_flag} from catalogue {catname} "
                                              "metadata file.")
                     # Set all lists of floats
-                    for var in ['gal_wavs', 'gal_zmax', 'gal_aboffsets']:
+                    for var in ['gal_wavs', 'gal_zmax', 'gal_aboffsets', 'saturation_magnitudes']:
                         a = config[var].split(' ')
                         try:
                             b = np.array([float(f) for f in a])

diff --git a/src/macauff/misc_functions.py b/src/macauff/misc_functions.py
@@ -5,6 +5,7 @@
 '''
 
 import numpy as np
+from scipy.optimize import minimize
 
 __all__ = []
 
@@ -312,3 +313,80 @@ def min_max_lon(a):
     # Otherwise, the limits are inside [0, 360] and should be returned
     # as the "normal" minimum and maximum values.
     return min_lon, max_lon
+
+
+# pylint: disable=unused-argument
+def find_model_counts_corrections(data_loghist, data_dloghist, data_bin_mids, tri_hist,
+                                  gal_dns, tri_mag_mids):
+    '''
+    Derivation of two-parameter corrections to differential source counts, fitting
+    both stellar and galaxy components with separate scaling factors.
+
+    Parameters
+    ----------
+    data_loghist : numpy.ndarray
+        Logarithmic differential source counts.
+    data_dloghist : numpy.ndarray
+        Uncertainties in log-counts from ``data_loghist``.
+    data_bin_mids : numpy.ndarray
+        Magnitudes of each bin corresponding to ``data_loghist``.
+    tri_hist : numpy.ndarray
+        Stellar model linear differential source counts.
+    gal_dns : numpy.ndarray
+        Galaxy model linear differential source counts.
+    tri_mag_mids : numpy.ndarray
+        Model magnitudes for each element of ``tri_hist`` and/or ``gal_dns``.
+
+    Returns
+    -------
+    res.x : numpy.ndarray
+        Value of least-squares fit scaling factors for stellar and galaxy
+        components of the differential source counts, as fit to the
+        input data.
+    '''
+    def lst_sq(p, y, o, t, g):
+        '''
+        Function evaluating the least-squares minimisation, and Jacobian,
+        of a fit to model and data differential source counts.
+
+        Parameters
+        ----------
+        p : list
+            ``a`` and ``b``, scaling factors for star and galaxy components
+            of the source counts.
+        y : numpy.ndarray
+            Data log-counts.
+        o : numpy.ndarray
+            Uncertainties corresponding to ``y``.
+        t : numpy.ndarray
+            Log-counts of stellar component of source counts.
+        g : numpy.ndarray
+            Log-counts of galaxy component of source counts.
+
+        Returns
+        -------
+        float
+            Chi-squared of the model source counts fit to the data.
+        list of floats
+            Gradient of the chi-squared, differentiated with respect to
+            the stellar and galaxy correction factors.
+        '''
+        a, b = p
+        f = np.log10(10**t * a + 10**g * b)
+        dfda = 10**t / (np.log(10) * (10**t * a + 10**g * b))
+        dfdb = 10**g / (np.log(10) * (10**t * a + 10**g * b))
+        dchida = np.sum(-2 * (y - f) / o**2 * dfda)
+        dchidb = np.sum(-2 * (y - f) / o**2 * dfdb)
+        return np.sum((y - f)**2 / o**2), [dchida, dchidb]
+
+    q = tri_hist > 0
+    tri_log_hist_at_data = np.interp(data_bin_mids, tri_mag_mids[q],
+                                     np.log10(tri_hist[q]), left=np.nan, right=np.nan)
+    q = gal_dns > 0
+    gal_log_hist_at_data = np.interp(data_bin_mids, tri_mag_mids[q],
+                                     np.log10(gal_dns[q]), left=np.nan, right=np.nan)
+    q = ~np.isnan(tri_log_hist_at_data) & ~np.isnan(gal_log_hist_at_data)
+    res = minimize(lst_sq, args=(data_loghist[q], np.ones_like(data_loghist[q]),
+                                 tri_log_hist_at_data[q], gal_log_hist_at_data[q]), x0=[1, 1], jac=True,
+                   method='L-BFGS-B', options={'ftol': 1e-12}, bounds=[(0.01, None), (0.01, None)])
+    return res.x