From 784521b366fbb104ceda36680f41584a780737b6 Mon Sep 17 00:00:00 2001 From: = Date: Tue, 18 Oct 2022 10:30:48 -0700 Subject: [PATCH] adding srv tests, testing add on catalogs, testing analysis_tools (this is all untested, wait a few days before actually pulling these changes) --- descqa/configs/srv_analysistools.yaml | 17 + descqa/configs/srv_colortest.yaml | 17 +- descqa/configs/srv_ngals.yaml | 46 ++- descqa/configs/srv_ngals_dc2.yaml | 25 ++ descqa/configs/srv_readiness_dc2.yaml | 75 +++++ descqa/configs/srv_readiness_dp02.yaml | 56 ++++ descqa/configs/srv_shear.yaml | 2 +- descqa/configs/srv_shear_dp02.yaml | 17 + descqa/parallel.py | 2 + descqa/readiness_test_synchronous.py | 19 +- descqa/srv_analysistools.py | 259 +++++++++++++++ descqa/srv_colortest.py | 428 ++++++++++++++++++------- descqa/srv_ngals.py | 46 ++- descqa/srv_quality.py | 346 ++++++++++++++++++++ descqa/srv_readiness_test.py | 9 + descqa/srv_shear.py | 5 + descqa/srv_truth.py | 346 ++++++++++++++++++++ descqa/srv_txpipe.py | 346 ++++++++++++++++++++ descqarun/master.py | 1 - run_master_parallel.sh | 17 +- run_master_slurm.sh | 6 +- setup.py | 2 +- 22 files changed, 1920 insertions(+), 167 deletions(-) create mode 100644 descqa/configs/srv_analysistools.yaml create mode 100644 descqa/configs/srv_ngals_dc2.yaml create mode 100644 descqa/configs/srv_readiness_dc2.yaml create mode 100644 descqa/configs/srv_readiness_dp02.yaml create mode 100644 descqa/configs/srv_shear_dp02.yaml create mode 100644 descqa/srv_analysistools.py create mode 100644 descqa/srv_quality.py create mode 100644 descqa/srv_truth.py create mode 100644 descqa/srv_txpipe.py diff --git a/descqa/configs/srv_analysistools.yaml b/descqa/configs/srv_analysistools.yaml new file mode 100644 index 00000000..a25d77f6 --- /dev/null +++ b/descqa/configs/srv_analysistools.yaml @@ -0,0 +1,17 @@ +subclass_name: srv_analysistools.TestMetric +description: 'test install of analysis tools' +included_by_default: true + +ra: 'ra' +dec: 'dec' +models: ['cModel'] + +bands: ['r','i'] + +catalog_filters: + + - filters: ['detect_isIsolated','extendedness==0','detect_isPrimary'] + +mag_lim: 26 +snr_lim: 5.0 +compare: True diff --git a/descqa/configs/srv_colortest.yaml b/descqa/configs/srv_colortest.yaml index 715b6b7b..f5504128 100644 --- a/descqa/configs/srv_colortest.yaml +++ b/descqa/configs/srv_colortest.yaml @@ -1,16 +1,17 @@ -subclass_name: srv_colortest.CheckFluxes -description: 'Check flags' +subclass_name: srv_colortest2.CheckFluxes +description: 'Check fluxes and magnitudes' included_by_default: true ra: 'ra' dec: 'dec' -cmodel: 'cModelFlux_' -cmodel_err: 'cModelFluxErr_' -cmodel_flag: 'cModelFlux_flag_' +models: ['calib','cModel','gaap'] -bands: - - bands: ['r','u','i'] +bands: ['r','u','i','g','z','y'] catalog_filters: - - filters: ['mag_r < 28', 'mag_i < 28'] + - filters: ['detect_isIsolated','extendedness==0','detect_isPrimary'] + +mag_lim: 26 +snr_lim: 5.0 +compare: True diff --git a/descqa/configs/srv_ngals.yaml b/descqa/configs/srv_ngals.yaml index 1d0006f2..fd351aa0 100644 --- a/descqa/configs/srv_ngals.yaml +++ b/descqa/configs/srv_ngals.yaml @@ -5,22 +5,52 @@ ra: 'ra' dec: 'dec' flags_to_check: - - quantities: 'psFlux_flag_u' + + #- quantities: 'deblend_skipped' + #kind: 'bool' + #flag_val: True + + # - quantities: 'refBand' + #kind: 'str' + #flag_val: 'r' + # + #- quantities: 'sky_object' + #kind: 'bool' + #flag_val: True + + - quantities: 'cModelFlux_flag_i' kind: 'bool' - flag_val: False + flag_val: True - - quantities: 'good' + - quantities: 'cModelFlux_flag_r' kind: 'bool' - flag_val: False + flag_val: True + + #- quantities: 'shape_flag' + #kind: 'bool' + #flag_val: True - - quantities: 'I_flag' + #- quantities: 'xy_flag' + #kind: 'bool' + #flag_val: True + + - quantities: 'detect_isIsolated' kind: 'bool' - flag_val: False + flag_val: True - - quantities: 'cModelFlux_flag_r' + #- quantities: 'i_pixelFlags_bad' + #kind: 'bool' + #flag_val: True + + # - quantities: 'i_pixelFlags_interpolatedCenter' + #kind: 'bool' + #flag_val: True + + - quantities: 'good' kind: 'bool' flag_val: False + catalog_filters: - - filters: ['extendedness==1'] + - filters: ['extendedness==1','detect_isPrimary','mag_r < 25'] diff --git a/descqa/configs/srv_ngals_dc2.yaml b/descqa/configs/srv_ngals_dc2.yaml new file mode 100644 index 00000000..e13f3e16 --- /dev/null +++ b/descqa/configs/srv_ngals_dc2.yaml @@ -0,0 +1,25 @@ +subclass_name: srv_ngals.CheckNgals +description: 'Check flags' +included_by_default: true +ra: 'ra' +dec: 'dec' + +flags_to_check: + + - quantities: 'cModelFlux_flag_i' + kind: 'bool' + flag_val: True + + - quantities: 'cModelFlux_flag_r' + kind: 'bool' + flag_val: True + + - quantities: 'good' + kind: 'bool' + flag_val: False + + +catalog_filters: + + - filters: ['mag_r < 25'] + diff --git a/descqa/configs/srv_readiness_dc2.yaml b/descqa/configs/srv_readiness_dc2.yaml new file mode 100644 index 00000000..cc1bcbf9 --- /dev/null +++ b/descqa/configs/srv_readiness_dc2.yaml @@ -0,0 +1,75 @@ +subclass_name: readiness_test_synchronous.CheckQuantities +description: 'Plot histograms of listed quantities and perform range, finiteness, mean and standard deviation checks.' +included_by_default: true + +subset_size: 1000000 + +quantities_to_check: + - quantities: ['ra', 'dec'] + kind: 'double' + label: 'deg' + min: [-2.51, -2.49] + max: [2.49, 2.51] + f_inf: 0 + f_nan: 0 + f_outlier: 0 + + - quantities: ['cModelFlux_i','cModelFlux_r','cModelFlux_g'] + kind: 'double' + label: 'cModelFlux (i,r,g)' + min: [-0.2, 0] + max: [0, 0.2] + f_inf: 0 + log: true + f_nan: 0 + f_outlier: 0 + + - quantities: ['psFlux_i', 'psFlux_r','psFlux_g'] + kind: 'double' + label: 'psFlux (i,r,g)' + min: [0, 0.001] + max: [179.99, 180] + f_inf: 0 + log: true + f_nan: 0 + f_outlier: 0 + + - quantities: ['mag_i','mag_r','mag_g'] + kind: 'double' + label: 'mag_x (x=i,r,g)' + min: [null, 15] + max: [25, null] + f_inf: 0 + f_nan: 0 + f_outlier: 0 + + - quantities: 'blendedness' + kind: 'double' + label: 'blendedness' + min: [NULL, NULL] + max: [NULL, NULL] + f_inf: 0 + f_nan: 0 + f_outlier: 0 + + + - quantities: 'psf_fwhm_r' + kind: 'double' + label: 'FWHM of r band' + min: [NULL, NULL] + max: [NULL, NULL] + f_inf: 0 + f_nan: 0 + f_outlier: 0 + + + - quantities: ['shear_1', 'shear_2'] + kind: 'double' + label: 'shear_12' + min: [null, -0.1] + max: [0.1, null] + f_inf: 0 + f_nan: 0 + f_outlier: 0 + + diff --git a/descqa/configs/srv_readiness_dp02.yaml b/descqa/configs/srv_readiness_dp02.yaml new file mode 100644 index 00000000..2279e352 --- /dev/null +++ b/descqa/configs/srv_readiness_dp02.yaml @@ -0,0 +1,56 @@ +subclass_name: readiness_test_synchronous.CheckQuantities +description: 'Plot histograms of listed quantities and perform range, finiteness, mean and standard deviation checks.' +included_by_default: true + +subset_size: 1000000 + +quantities_to_check: + - quantities: ['ra', 'dec'] + kind: 'double' + label: 'deg' + min: [-2.51, -2.49] + max: [2.49, 2.51] + f_inf: 0 + + - quantities: ['cModelFlux_i','cModelFlux_r','cModelFlux_g'] + kind: 'double' + label: 'cModelFlux (i,r,g)' + min: [-0.2, 0] + max: [0, 0.2] + f_inf: 0 + log: true + + - quantities: ['psFlux_i', 'psFlux_r','psFlux_g'] + kind: 'double' + label: 'psFlux (i,r,g)' + min: [0, 0.001] + max: [179.99, 180] + f_inf: 0 + log: true + + - quantities: ['mag_i','mag_r','mag_g'] + kind: 'double' + label: 'mag_x (x=i,r,g)' + min: [null, 15] + max: [25, null] + f_inf: 0 + + - quantities: 'deblend_nChild' + kind: 'int' + label: '# blended objects' + + - quantities: 'refFwhm' + kind: 'double' + label: 'FWHM of ref band' + + - quantities: ['shear_1', 'shear_2'] + kind: 'double' + label: 'shear_12' + min: [null, -0.1] + max: [0.1, null] + f_inf: 0 + +catalog_filters: + + - filters: ['extendedness==1', 'sky_object==False', 'shape_flag==False','detect_isPrimary'] + diff --git a/descqa/configs/srv_shear.yaml b/descqa/configs/srv_shear.yaml index 59ec637f..f3b7c9df 100644 --- a/descqa/configs/srv_shear.yaml +++ b/descqa/configs/srv_shear.yaml @@ -1,4 +1,4 @@ -subclass_name: srv_s1s2.CheckEllipticity +subclass_name: srv_shear.CheckEllipticity description: 'Check ellipticity values and moments' included_by_default: true diff --git a/descqa/configs/srv_shear_dp02.yaml b/descqa/configs/srv_shear_dp02.yaml new file mode 100644 index 00000000..2a2479ee --- /dev/null +++ b/descqa/configs/srv_shear_dp02.yaml @@ -0,0 +1,17 @@ +subclass_name: srv_shear.CheckEllipticity +description: 'Check ellipticity values and moments' +included_by_default: true + +ra: 'ra' +dec: 'dec' +Ixx: 'Ixx_pixel' +Iyy: 'Iyy_pixel' +Ixy: 'Ixy_pixel' +IxxPSF: 'IxxPSF_pixel' +IyyPSF: 'IyyPSF_pixel' +IxyPSF: 'IxyPSF_pixel' +psf_fwhm: 'psf_fwhm' +bands: ['r','i','g','z','y'] + +catalog_filters: + - filters: ['extendedness==1','detect_isPrimary'] diff --git a/descqa/parallel.py b/descqa/parallel.py index 9f29cbd0..e72b244c 100644 --- a/descqa/parallel.py +++ b/descqa/parallel.py @@ -40,6 +40,8 @@ def send_to_master(value, kind): recvbuf = np.zeros(tot_num) elif kind=='bool': recvbuf = np.zeros(tot_num)!=0.0 + elif kind=='int': + recvbuf = np.zeros(tot_num).astype(int) else: raise NotImplementedError else: diff --git a/descqa/readiness_test_synchronous.py b/descqa/readiness_test_synchronous.py index b250187e..c3ab93ee 100644 --- a/descqa/readiness_test_synchronous.py +++ b/descqa/readiness_test_synchronous.py @@ -2,7 +2,7 @@ import os import re import fnmatch -from itertools import cycle +from itertools import cycle, chain from collections import defaultdict, OrderedDict import numpy as np import numexpr as ne @@ -48,6 +48,7 @@ def find_outlier(x,subset_size): x_small = np.random.choice(x,size=subset_size) l, m, h = np.percentile(x_small, norm.cdf([-1, 0, 1])*100) else: + x_small = x l, m, h = np.percentile(x_small, norm.cdf([-1, 0, 1])*100) d = (h-l) * 0.5 return np.sum((x > (m + d*3)) | (x < (m - d*3))) @@ -155,9 +156,9 @@ def __init__(self, **kwargs): if not all(d.get('quantity') for d in self.uniqueness_to_check): raise ValueError('yaml file error: `quantity` must exist for each item in `uniqueness_to_check`') - if self.catalog_filters: - if not all(d.get('quantity') for d in self.catalog_filters): - raise ValueError('yaml file error: `quantity` must exist for each item in `catalog_filters`') + #if self.catalog_filters: + # if not all(d.get('quantity') for d in self.catalog_filters): + # raise ValueError('yaml file error: `quantity` must exist for each item in `catalog_filters`') if not all(d.get('min') for d in self.catalog_filters) or all(d.get('max') for d in self.catalog_filters): raise ValueError('yaml file error: `min` or `max` must exist for each item in `catalog_filters`') @@ -266,8 +267,14 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): catalog_name = catalog_name.partition("_")[0] version = getattr(catalog_instance, 'version', '') if not self.no_version else '' + filters=[] + for i, filt in enumerate(self.catalog_filters): + filters.append(filt['filters']) + filters = list(chain(*filters)) + + print(filters) # check filters - filters = [] + """filters = [] filter_labels = '' for d in self.catalog_filters: if rank==0: @@ -288,7 +295,7 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): else: self.record_result('Found no matching quantity for filtering on {}'.format(fq), failed=True) continue - + """ lgnd_loc_dflt ='best' diff --git a/descqa/srv_analysistools.py b/descqa/srv_analysistools.py new file mode 100644 index 00000000..93b97485 --- /dev/null +++ b/descqa/srv_analysistools.py @@ -0,0 +1,259 @@ +from __future__ import print_function, division, unicode_literals, absolute_import +import os +import re +import fnmatch +from itertools import cycle +from collections import defaultdict, OrderedDict +import numpy as np +import numexpr as ne +from scipy.stats import norm, binned_statistic +from mpi4py import MPI +import time + +from .base import BaseValidationTest, TestResult +from .plotting import plt +from .parallel import send_to_master + +#import lsst +#import lsst.analysis +import lsst.analysis.tools + +from lsst.analysis.tools.actions.scalar import MedianAction +from lsst.analysis.tools.actions.vector import SnSelector, MagColumnNanoJansky +from lsst.analysis.tools.interfaces import AnalysisMetric + +comm = MPI.COMM_WORLD +size = comm.Get_size() +rank = comm.Get_rank() + + +__all__ = ['TestMetric','DemoMetric'] + + +class DemoMetric(AnalysisMetric): + def setDefaults(self): + super().setDefaults() + + # select on high signal to noise obejcts + # add in a signal to noise selector + + + #self.prep.selectors.snSelector = SnSelector() + + # set what key the selector should use when deciding SNR + #self.prep.selectors.snSelector.fluxType = "psfFlux" + + # select what threshold value is desireable for the selector + #self.prep.selectors.snSelector.threshold = 100 + + # the final name in the qualification is used as a key to insert + # the calculation into KeyedData + self.process.buildActions.mags = MagColumnNanoJansky(vectorKey="psfFlux") + self.process.calculateActions.medianValueName = MedianAction(vectorKey="mags") + + # tell the metic what the units are for the quantity + #self.produce.units = {"medianValueName": "Jy"} + self.produce.units = {"medianValueName": "mag"} + + # Rename the quanity prior to producing the Metric + # (useful for resuable workflows that set a name toward the end of computation) + self.produce.newNames = {"medianValueName": "DemoMetric"} + + +class TestMetric(BaseValidationTest): + """ + Check flux values and magnitudes + """ + + def __init__(self, **kwargs): + self.catalog_filters = kwargs.get('catalog_filters', []) + self.lgndtitle_fontsize = kwargs.get('lgndtitle_fontsize', 12) + self.truncate_cat_name = kwargs.get('truncate_cat_name', False) + self.no_version = kwargs.get('no_version', False) + self.title_size = kwargs.get('title_size', 'small') + self.font_size = kwargs.get('font_size', 12) + self.legend_size = kwargs.get('legend_size', 'x-small') + self.ra = kwargs.get('ra') + self.dec = kwargs.get('dec') + self.compare = kwargs.get('compare',False) + self.models = kwargs.get('models') + self.bands = kwargs.get('bands') + self.mag_lim = kwargs.get('mag_lim') + self.snr_lim = kwargs.get('snr_lim',0) + + + if not any(( + self.catalog_filters, + )): + raise ValueError('you need to specify catalog_filters for these checks, add a good flag if unsure') + + self.enable_individual_summary = bool(kwargs.get('enable_individual_summary', True)) + self.enable_aggregated_summary = bool(kwargs.get('enable_aggregated_summary', False)) + self.always_show_plot = bool(kwargs.get('always_show_plot', True)) + + self.nbins = int(kwargs.get('nbins', 20)) + self.prop_cycle = None + + self.current_catalog_name = None + self.current_failed_count = None + self._aggregated_header = list() + self._aggregated_table = list() + self._individual_header = list() + self._individual_table = list() + + super(TestMetric, self).__init__(**kwargs) + + def record_result(self, results, quantity_name=None, more_info=None, failed=None, individual_only=False): + if isinstance(results, dict): + self.current_failed_count += sum(1 for v in results.values() if v[1] == 'fail') + elif failed: + self.current_failed_count += 1 + + if self.enable_individual_summary: + if quantity_name is None: + self._individual_header.append(self.format_result_header(results, failed)) + else: + self._individual_table.append(self.format_result_row(results, quantity_name, more_info)) + + if self.enable_aggregated_summary and not individual_only: + if quantity_name is None: + results = '{} {}'.format(self.current_catalog_name, results) if self.current_catalog_name else results + self._aggregated_header.append(self.format_result_header(results, failed)) + else: + quantity_name = '{} {}'.format(self.current_catalog_name, quantity_name) if self.current_catalog_name else quantity_name + self._aggregated_table.append(self.format_result_row(results, quantity_name, more_info)) + + def format_result_row(self, results, quantity_name, more_info): + more_info = 'title="{}"'.format(more_info) if more_info else '' + output = ['', '{0}'.format(quantity_name, more_info)] + output.append('') + return ''.join(output) + + @staticmethod + def format_result_header(results, failed=False): + return '{0}'.format(results, 'class="fail"' if failed else '') + + + + def generate_summary(self, output_dir, aggregated=False): + if aggregated: + if not self.enable_aggregated_summary: + return + header = self._aggregated_header + table = self._aggregated_table + else: + if not self.enable_individual_summary: + return + header = self._individual_header + table = self._individual_table + + with open(os.path.join(output_dir, 'SUMMARY.html'), 'w') as f: + f.write('\n') + + f.write('
\n') + + f.write('\n') + f.write('\n') + for line in table: + f.write(line) + f.write('\n') + f.write('
Quantity
\n') + + if not aggregated: + self._individual_header.clear() + self._individual_table.clear() + + def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): + + all_quantities = sorted(map(str, catalog_instance.list_all_quantities(True))) + + self.prop_cycle = cycle(iter(plt.rcParams['axes.prop_cycle'])) + self.current_catalog_name = catalog_name + self.current_failed_count = 0 + galaxy_count = None + quantity_hashes = defaultdict(set) + + if rank==0: + self.record_result('Running flux and magnitude test on {} {}'.format( + catalog_name, + getattr(catalog_instance, 'version', ''), + individual_only=True, + )) + + if self.truncate_cat_name: + catalog_name = catalog_name.partition("_")[0] + version = getattr(catalog_instance, 'version', '') if not self.no_version else '' + + # create filter labels + filters=[] + for i, filt in enumerate(self.catalog_filters): + filters = filt['filters'] + + print(filters) + lgnd_loc_dflt ='best' + + label_tot=[] + plots_tot=[] + + # doing everything together this time so we can combine flags + quantities = [] + print(type(self.models)) + print(type(self.bands)) + print(self.models[0]) + for band in self.bands: + print(band) + for model in self.models: + print(model) + quantities.append(model+'Flux_'+band); quantities.append(model+'FluxErr_'+band); quantities.append(model+'Flux_flag_'+band) #fluxes + #quantities.append('mag_'+band + '_'+model); quantities.append('magerr_'+band+'_'+model); quantities.append('snr_'+band+'_'+model); #mags + quantities = tuple(quantities) + # note that snr is defined on flux directly and not on magnitudes + + # reading in the data + if len(filters) > 0: + catalog_data = catalog_instance.get_quantities(quantities,filters=filters,return_iterator=False, rank=rank, size=size) + else: + catalog_data = catalog_instance.get_quantities(quantities,return_iterator=False, rank=rank, size=size) + a = time.time() + + + data_rank={} + recvbuf={} + for quantity in quantities: + data_rank[quantity] = catalog_data[quantity] + print(len(data_rank[quantity])) + if 'flag' in quantity: + recvbuf[quantity] = send_to_master(data_rank[quantity],'bool') + else: + recvbuf[quantity] = send_to_master(data_rank[quantity],'double') + + if rank==0: + print(len(recvbuf[quantity])) + + + if rank==0: + fluxes = recvbuf['cModelFlux_r'] + fluxerr = recvbuf['cModelFluxErr_r'] + data = {"psfFlux": fluxes, "psfFluxErr": fluxerr} + self.metric = DemoMetric()(data)['DemoMetric'].quantity.value + + if rank==0: + self.generate_summary(output_dir) + else: + self.current_failed_count=0 + + + + self.current_failed_count = comm.bcast(self.current_failed_count, root=0) + self.metric = comm.bcast(self.metric, root=0) + #self.current_failed_count = metric['DemoMetric'].quantity.value + + return TestResult(passed=(self.current_failed_count == 0), score=self.metric) + + def conclude_test(self, output_dir): + self.generate_summary(output_dir, aggregated=True) diff --git a/descqa/srv_colortest.py b/descqa/srv_colortest.py index d25e030b..4a65a862 100644 --- a/descqa/srv_colortest.py +++ b/descqa/srv_colortest.py @@ -6,13 +6,13 @@ from collections import defaultdict, OrderedDict import numpy as np import numexpr as ne -from scipy.stats import norm +from scipy.stats import norm, binned_statistic from mpi4py import MPI -import healpy as hp import time from .base import BaseValidationTest, TestResult from .plotting import plt +from .parallel import send_to_master comm = MPI.COMM_WORLD size = comm.Get_size() @@ -22,9 +22,10 @@ __all__ = ['CheckFluxes'] + class CheckFluxes(BaseValidationTest): """ - Check of number of galaxies given flags and filters + Check flux values and magnitudes """ def __init__(self, **kwargs): @@ -33,27 +34,27 @@ def __init__(self, **kwargs): self.truncate_cat_name = kwargs.get('truncate_cat_name', False) self.no_version = kwargs.get('no_version', False) self.title_size = kwargs.get('title_size', 'small') - self.font_size = kwargs.get('font_size', 12) + self.font_size = kwargs.get('font_size', 12) self.legend_size = kwargs.get('legend_size', 'x-small') self.ra = kwargs.get('ra') self.dec = kwargs.get('dec') + self.compare = kwargs.get('compare',False) + self.models = kwargs.get('models') + self.bands = kwargs.get('bands') + self.mag_lim = kwargs.get('mag_lim') + self.snr_lim = kwargs.get('snr_lim',0) - self.cmodel = kwargs.get('cmodel') - self.cmodel_err = kwargs.get('cmodel_err') - self.cmodel_flag = kwargs.get('cmodel_flag') - self.bands = kwargs.get('bands',[]) if not any(( - self.bands + self.catalog_filters, )): - raise ValueError('must specify bands') - + raise ValueError('you need to specify catalog_filters for these checks, add a good flag if unsure') self.enable_individual_summary = bool(kwargs.get('enable_individual_summary', True)) self.enable_aggregated_summary = bool(kwargs.get('enable_aggregated_summary', False)) self.always_show_plot = bool(kwargs.get('always_show_plot', True)) - self.nbins = int(kwargs.get('nbins', 50)) + self.nbins = int(kwargs.get('nbins', 20)) self.prop_cycle = None self.current_catalog_name = None @@ -95,6 +96,185 @@ def format_result_row(self, results, quantity_name, more_info): def format_result_header(results, failed=False): return '{0}'.format(results, 'class="fail"' if failed else '') + + def plot_mag_perband(self, mags, output_dir): + ''' make per band and per model plots ''' + # make flags on all bands and with flags + masks = {} + for model in self.models: + mask_init = mags['mag_r_'+model]<=self.mag_lim + for band in self.bands: + mask_init = mask_init&(mags['mag_'+band+'_'+model]<=self.mag_lim) + #mask_init = mask_init&(mags[model+'Flux_flag_'+band]==False) + mask_init = mask_init&(mags['snr_'+band+'_'+model]>=self.snr_lim) + masks[model] = mask_init + + for band in self.bands: + plt.figure() + plt.title('mag_'+band) + for model in self.models: + mask_band = mags['mag_'+band+'_'+model]=self.snr_lim) + mag_vals = mags['mag_'+band+'_'+model][mask_band]#[masks[model]] + plt.hist(mag_vals,bins=np.linspace(22,self.mag_lim,100),alpha=0.5,label=model) + plt.legend() + plt.xlabel('mag_'+band) + plt.savefig(os.path.join(output_dir,'mag_'+band+'.png')) + plt.close() + + return + + + + def plot_color_permodel(self,mags,output_dir): + ''' ''' + + # color plots for each model + plt.figure() + for model in self.models: + mask_init = (mags['mag_r_'+model]<=self.mag_lim)&(mags['mag_g_'+model]<=self.mag_lim) + g_min_r = mags['mag_g_'+model][mask_init] - mags['mag_r_'+model][mask_init] + plt.hist(g_min_r, bins = np.linspace(-2,2,100),label=model, alpha=0.5) + plt.legend() + plt.xlabel('g_min_r') + plt.savefig(os.path.join(output_dir,'gmr.png')) + plt.close() + + # color-color plots + for model in self.models: + plt.figure() + mask_init = (mags['mag_r_'+model]<=self.mag_lim)&(mags['mag_g_'+model]<=self.mag_lim) + mask_init = (mags['mag_i_'+model]<=self.mag_lim) + g_min_r = mags['mag_g_'+model][mask_init] - mags['mag_r_'+model][mask_init] + r_min_i = mags['mag_r_'+model][mask_init] - mags['mag_i_'+model][mask_init] + plt.hist2d(g_min_r, r_min_i, bins = np.linspace(-0.5,1.5,100)) + plt.title(model) + plt.xlabel('g_min_r') + plt.ylabel('r_min_i') + plt.savefig(os.path.join(output_dir,'gmr_rmini_'+model+'.png')) + plt.close() + + # comparison plots for color + model1 = self.models[0] + mask1 = (mags['mag_r_'+model1]<=self.mag_lim)&(mags['mag_g_'+model1]<=self.mag_lim) + for model in self.models[1:]: + mask2 = (mags['mag_r_'+model]<=self.mag_lim)&(mags['mag_g_'+model]<=self.mag_lim) + mask_tot = mask1&mask2 + g_min_r1 = mags['mag_g_'+model1][mask_tot] - mags['mag_r_'+model1][mask_tot] + g_min_r2 = mags['mag_g_'+model][mask_tot] - mags['mag_r_'+model][mask_tot] + plt.figure() + plt.scatter(g_min_r1,g_min_r2-g_min_r1,s=0.01) + a1,b1,c1 = binned_statistic(g_min_r1,g_min_r2-g_min_r1,bins= np.linspace(-0.5,1.5,100),statistic='median') + plt.plot((b1[1:]+b1[:-1])/2.,a1,'r--') + plt.plot((b1[1:]+b1[:-1])/2.,np.zeros_like(a1),'k') + plt.xlabel(model1) + plt.ylabel(model+'-'+model1) + plt.xlim([-0.5,1.5]) + plt.ylim([-2,2]) + plt.savefig(os.path.join(output_dir,'gmr'+model1+'_'+model+'.png')) + plt.close() + + # comparison plots for mag + model1 = self.models[0] + for band in self.bands: + mask1 = (mags['mag_'+band+'_'+model1]<=self.mag_lim)&(mags['snr_'+band+'_'+model1]>=self.snr_lim) + #mask_init = mask_init&(mags[model+'Flux_flag_'+band]==False) + for model in self.models[1:]: + mask2 = (mags['mag_'+band+'_'+model]<=self.mag_lim)&(mags['snr_'+band+'_'+model]>=self.snr_lim) + mask_tot = mask1&mask2 + g_min_r1 = mags['mag_'+band+'_'+model1][mask_tot] + g_min_r2 = mags['mag_'+band+'_'+model][mask_tot] + plt.figure() + plt.scatter(g_min_r1,g_min_r2-g_min_r1,s=0.01) + a1,b1,c1 = binned_statistic(g_min_r1,g_min_r2-g_min_r1,bins= np.linspace(18,self.mag_lim,100),statistic='median') + plt.plot((b1[1:]+b1[:-1])/2.,a1,'r--') + plt.plot((b1[1:]+b1[:-1])/2.,np.zeros_like(a1),'k') + plt.ylim([-1.5,1.5]) + plt.xlim([18,self.mag_lim]) + plt.xlabel(model1) + plt.ylabel(model +' - '+ model1) + plt.title('mag_'+band) + plt.savefig(os.path.join(output_dir,'mag_'+band+'_'+model1+'_'+model+'_scatter.png')) + plt.close() + + return + + ''' + + def compare_colors(mags,model1,model2): + return + + def compare_mags(mags,model1,model2): + return + + def compare_fluxes(mags,model1,model2): + return ''' + + + + """def plot_moments_band(self, Ixx,Ixy,Iyy,band,output_dir): + ''' + Plot moments for each band + ''' + plt.figure() + bins = np.logspace(-1.,1.5,100) + bins_mid = (bins[1:]+bins[:-1])/2. + Ixx_out, bin_edges = np.histogram(Ixx, bins=bins) + Ixy_out, bin_edges = np.histogram(Ixy, bins=bins) + Iyy_out, bin_edges = np.histogram(Iyy, bins=bins) + self.record_result((0,'moments_'+band),'moments_'+band,'moments_'+band+'.png') + plt.plot(bins_mid,Ixx_out,'b',label='Ixx') + plt.plot(bins_mid,Iyy_out,'r--',label='Iyy') + plt.plot(bins_mid,Ixy_out,'k-.',label='Ixy') + plt.yscale('log') + plt.xscale('log') + plt.title(band) + plt.legend() + plt.savefig(os.path.join(output_dir, 'moments_'+band+'.png')) + plt.close() + return + + def plot_ellipticities_band(self,e1,e2,band,output_dir): + ''' + Plot elliptiticies for each band + ''' + plt.figure() + bins = np.linspace(0.,1.,51) + bins_mid = (bins[1:]+bins[:-1])/2. + e1_out, bin_edges = np.histogram(e1, bins=bins) + e2_out, bin_edges = np.histogram(e2, bins=bins) + self.record_result((0,'ell_'+band),'ell_'+band,'ell_'+band+'.png') + plt.plot(bins_mid,e1_out,'b',label='e1') + plt.plot(bins_mid,e2_out,'r--',label='e2') + plt.yscale('log') + #plt.xscale('log') + plt.title(band) + plt.legend() + plt.savefig(os.path.join(output_dir, 'ell_'+band+'.png')) + plt.close() + return + + def plot_psf(self,fwhm,band,output_dir): + ''' + Plot elliptiticies for each band + ''' + plt.figure() + bins = np.linspace(0.,1.5,201) + bins_mid = (bins[1:]+bins[:-1])/2. + fwhm_out, bin_edges = np.histogram(fwhm, bins=bins) + self.record_result((0,'psf_'+band),'psf_'+band,'psf_'+band+'.png') + plt.plot(bins_mid,fwhm_out,'b',label='psf') + plt.yscale('log') + plt.xlim([0.5,1.4]) + plt.title(band) + plt.legend() + plt.savefig(os.path.join(output_dir, 'psf_'+band+'.png')) + plt.close() + return""" + + + + def generate_summary(self, output_dir, aggregated=False): if aggregated: if not self.enable_aggregated_summary: @@ -139,7 +319,7 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): quantity_hashes = defaultdict(set) if rank==0: - self.record_result('Running galaxy number test on {} {}'.format( + self.record_result('Running flux and magnitude test on {} {}'.format( catalog_name, getattr(catalog_instance, 'version', ''), individual_only=True, @@ -153,130 +333,132 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): filters=[] for i, filt in enumerate(self.catalog_filters): filters = filt['filters'] - for i, band in enumerate(self.bands): - bands = band['bands'] print(filters) lgnd_loc_dflt ='best' - - flags_tot =[] label_tot=[] plots_tot=[] - quantities_new = [] - - for band in bands: - quantity = self.cmodel+band - quantity_err = self.cmodel_err + band - quantities_new.append(quantity) - quantities_new.append(quantity_err) - - quantities_new = tuple(quantities_new) + # doing everything together this time so we can combine flags + quantities = [] + print(type(self.models)) + print(type(self.bands)) + print(self.models[0]) + for band in self.bands: + print(band) + for model in self.models: + print(model) + quantities.append(model+'Flux_'+band); quantities.append(model+'FluxErr_'+band); quantities.append(model+'Flux_flag_'+band) #fluxes + quantities.append('mag_'+band + '_'+model); quantities.append('magerr_'+band+'_'+model); quantities.append('snr_'+band+'_'+model); #mags + quantities = tuple(quantities) + # note that snr is defined on flux directly and not on magnitudes # reading in the data if len(filters) > 0: - catalog_data = catalog_instance.get_quantities(quantities_this_new,filters=filters,return_iterator=False, rank=rank, size=size) + catalog_data = catalog_instance.get_quantities(quantities,filters=filters,return_iterator=False, rank=rank, size=size) else: - catalog_data = catalog_instance.get_quantities(quantities_this_new,return_iterator=False, rank=rank, size=size) + catalog_data = catalog_instance.get_quantities(quantities,return_iterator=False, rank=rank, size=size) a = time.time() + + data_rank={} + recvbuf={} + for quantity in quantities: + data_rank[quantity] = catalog_data[quantity] + print(len(data_rank[quantity])) + if 'flag' in quantity: + recvbuf[quantity] = send_to_master(data_rank[quantity],'bool') + else: + recvbuf[quantity] = send_to_master(data_rank[quantity],'double') - for i, checks in enumerate(self.flags_to_check): - #quantities_this = checks['quantities'] - kind = checks['kind'] - flag_val = checks['flag_val'] - quantities_this = flags_tot[i] - - # for quantities_this in flags_tot: - fig = None; ax=None; - if rank==0: - fig, ax = plt.subplots() - has_plot = False - - for quantity in quantities_this: - #PL : only currently works for doubles - value = catalog_data[quantity] - count = len(value) - tot_num = comm.reduce(count) - counts = comm.allgather(count) - if rank==0: - if kind=='double': - recvbuf = np.zeros(tot_num) - elif kind=='bool': - recvbuf = np.zeros(tot_num)!=0.0 - else: - recvbuf = None - displs = np.array([sum(counts[:p]) for p in range(size)]) - if kind=='double': - comm.Gatherv([value,MPI.DOUBLE], [recvbuf,counts,displs,MPI.DOUBLE],root=0) - elif kind=='float': - comm.Gatherv([value,MPI.FLOAT], [recvbuf,counts,displs,MPI.FLOAT],root=0) - elif kind=='int': - comm.Gatherv([value,MPI.INT], [recvbuf,counts,displs,MPI.INT],root=0) - elif kind=='bool': - comm.Gatherv([value,MPI.BOOL], [recvbuf,counts,displs,MPI.BOOL],root=0) - elif kind=='int64': - comm.Gatherv([value,MPI.INT64_T], [recvbuf, counts, displs, MPI.INT64_T],root=0) - else: - print("add proper exception catch here") - - need_plot = False - - if rank==0: - print(time.time()-a,'read data',rank) - - a = time.time() - - if rank==0: - galaxy_count = len(recvbuf) - self.record_result('Found {} entries in this catalog.'.format(galaxy_count)) - print(quantity) - print(flag_val) - print(recvbuf[0]) - print('ngals masked = ',np.sum(recvbuf),' ngals unmasked = ', len(recvbuf)) - - need_plot=True - - - print(time.time()-a,'found entries') - a = time.time() - - result_this_quantity = ( - np.sum(recvbuf), - ('fail' if len(recvbuf)>(np.sum(recvbuf)*2.0) else 'pass'), - ) - quantity_hashes[result_this_quantity[0]].add(quantity) - self.record_result( - result_this_quantity, - quantity , - plots_tot[i] - ) - - if (need_plot or self.always_show_plot) and rank==0: - # # PL: Changed - need to change this to a numpy function and then communicate it before plotting - xbins = np.linspace(np.min(recvbuf_ra),np.max(recvbuf_ra),50) - ybins = np.linspace(np.min(recvbuf_dec),np.max(recvbuf_dec),50) - area = (xbins[1]-xbins[0])*(ybins[1]-ybins[0])*(60.**2) #arcminutes - im = ax.hist2d(recvbuf_ra[recvbuf],recvbuf_dec[recvbuf], bins=(xbins,ybins),weights = 1./area*np.ones(len(recvbuf_ra[recvbuf])))#, label=quantity) - has_plot = True - b = time.time() - - if has_plot and (rank==0): - #ax.set_xlabel(('log ' if checks.get('log') else '') + quantity_group_label, size=self.font_size) - #ax.yaxis.set_ticklabels([]) - if checks.get('plot_min') is not None: #zero values fail otherwise - ax.set_xlim(left=checks.get('plot_min')) - if checks.get('plot_max') is not None: - ax.set_xlim(right=checks.get('plot_max')) - ax.set_title('{} {}'.format(catalog_name, version), fontsize=self.title_size) - fig.colorbar(im[3], ax=ax) - ax.colorbar=True - fig.tight_layout() - print(plots_tot[i]) - print("closing figure") - fig.savefig(os.path.join(output_dir, plots_tot[i])) - plt.close(fig) + if rank==0: + print(len(recvbuf[quantity])) + + self.plot_mag_perband(recvbuf, output_dir) + self.plot_color_permodel(recvbuf, output_dir) + #plot_col_perband() + #plot_flux_perband() + + # comparisons + # cmodel vs gaap + # cmodel vs calib + # gaap vs calib + # flag checks + + # colors vs mags + + + + + #e1,e2 = shear_from_moments(recvbuf[self.Ixx+'_'+band],recvbuf[self.Ixy+'_'+band],recvbuf[self.Iyy+'_'+band]) + #e1psf,e2psf = shear_from_moments(recvbuf[self.IxxPSF+'_'+band],recvbuf[self.IxyPSF+'_'+band],recvbuf[self.IyyPSF+'_'+band]) + + #Ixx = recvbuf[self.Ixx+'_'+band] + #Iyy = recvbuf[self.Iyy+'_'+band] + #Ixy = recvbuf[self.Ixy+'_'+band] + #fwhm = recvbuf[self.psf_fwhm+'_'+band] + + + ##self.plot_moments_band(Ixx,Ixy,Iyy,band,output_dir) + #self.plot_ellipticities_band(e1,e2,band,output_dir) + #self.plot_psf(fwhm,band,output_dir) + + + # plot moments directly per filter. For good, star, galaxy + # FWHM of the psf + # calculate ellpiticities and make sure they're alright + # look at different bands + # note that we want to look by magnitude or SNR to understand the longer tail in moments + # PSF ellipticity whisker plot? + # look at what validate_drp is + + # s1/s2 plots + + # look at full ellipticity distribution test as well + #https://github.com/LSSTDESC/descqa/blob/master/descqa/EllipticityDistribution.py + #DC2 validation github - PSF ellipticity + # https://github.com/LSSTDESC/DC2-analysis/blob/master/validation/Run_1.2p_PSF_tests.ipynb + + #https://github.com/LSSTDESC/DC2-analysis/blob/master/validation/DC2_calexp_src_validation_1p2.ipynb + + # Look at notes here: + #https://github.com/LSSTDESC/DC2-production/issues/340 + + # get PSF FWHM directly from data, note comments on here: + # https://github.com/LSSTDESC/DC2-analysis/blob/u/wmwv/DR6_dask_refactor/validation/validate_dc2_run2.2i_object_table_dask.ipynb about focussing of the "telescope" + + + '''mask_finite = np.isfinite(e1)&np.isfinite(e2) + bs_out = bs(e1[mask_finite],values = e2[mask_finite],bins=100,statistic='mean') + plt.figure() + quantity_hashes[0].add('s1s2') + self.record_result((0,'s1s2'),'s1s2','p_s1s2.png') + plt.plot(bs_out[1][1:],bs_out[0]) + plt.savefig(os.path.join(output_dir, 'p_s1s2.png')) + plt.close() + + + plt.figure() + quantity_hashes[0].add('s1') + self.record_result((0,'s1'),'s1','p_s1.png') + #plt.hist(e1,bins=np.linspace(-1.,1.,100)) + plt.hist(e1psf,bins=100)#np.linspace(-1.,1.,100)) + plt.savefig(os.path.join(output_dir, 'p_s1.png')) + plt.close() + plt.figure() + quantity_hashes[0].add('s2') + self.record_result((0,'s2'),'s2','p_s2.png') + #plt.hist(e2,bins=np.linspace(-1.,1.,100)) + plt.hist(e2psf,bins=100)#np.linspace(-1.,1.,100)) + plt.savefig(os.path.join(output_dir, 'p_s2.png')) + plt.close()''' + '''plt.figure() + quantity_hashes[0].add('s12') + self.record_result((0,'s12'),'s12','p_s12.png') + plt.hist2d(e1,e2,bins=100) + plt.savefig(os.path.join(output_dir, 'p_s12.png')) + plt.close()''' if rank==0: self.generate_summary(output_dir) diff --git a/descqa/srv_ngals.py b/descqa/srv_ngals.py index f63c5156..5ed8c1d4 100644 --- a/descqa/srv_ngals.py +++ b/descqa/srv_ngals.py @@ -206,23 +206,38 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): kind = checks['kind'] flag_val = checks['flag_val'] quantities_this = flags_tot[i] + print(quantities_this) - fig = None; ax=None; - if rank==0: - fig, ax = plt.subplots() + fig = None; axs=None; + #if rank==0: + #fig, ax = plt.subplots() for quantity in quantities_this: #PL : only currently works for doubles and booleans + if quantity=='good' and rank==0: + print('good',catalog_data[quantity][0]) + print(flag_val) value = catalog_data[quantity] recvbuf = send_to_master(value, kind) - + if quantity=='good' and rank==0: + print('good2',recvbuf[0]) + if rank==0: - flag_val=False + fig, axs = plt.subplots(1,2,sharey=True,figsize=(12,5)) + if flag_val==False or flag_val=='False': + flag_val=False + else: + flag_val=True + result_this_quantity = {} galaxy_count = len(recvbuf) - recvbuf = np.logical_not(recvbuf) + if flag_val: + recvbuf = np.logical_not(recvbuf) - frac = np.sum(recvbuf)/(len(recvbuf)+0.0)*100. + if np.sum(recvbuf)>0: + frac = np.sum(recvbuf)/len(recvbuf)*100. + else: + frac = 0. xbins = np.linspace(np.min(recvbuf_ra),np.max(recvbuf_ra),50) ybins = np.linspace(np.min(recvbuf_dec),np.max(recvbuf_dec),50) @@ -230,7 +245,9 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): # area in square arcminutes - im = ax.hist2d(recvbuf_ra[recvbuf],recvbuf_dec[recvbuf], bins=(xbins,ybins),weights = 1./area*np.ones(len(recvbuf_ra[recvbuf]))) + im = axs[0].hist2d(recvbuf_ra[recvbuf],recvbuf_dec[recvbuf], bins=(xbins,ybins),weights = 1./area*np.ones(len(recvbuf_ra[recvbuf]))) + im2 = axs[1].hist2d(recvbuf_ra[np.logical_not(recvbuf)],recvbuf_dec[np.logical_not(recvbuf)], bins=(xbins,ybins),weights = 1./area*np.ones(len(recvbuf_ra[np.logical_not(recvbuf)]))) + result_this_quantity[0] = ( np.mean(im[0][im[0]>0]), @@ -253,9 +270,16 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): ax.set_xlim(left=checks.get('plot_min')) if checks.get('plot_max') is not None: ax.set_xlim(right=checks.get('plot_max')) - ax.set_title('{} {}'.format(catalog_name, version), fontsize=self.title_size) - fig.colorbar(im[3], ax=ax) - ax.colorbar=True + axs[0].set_title('True: {} on {}'.format(quantity, catalog_name), fontsize=self.title_size) + axs[1].set_title('False: {} on {}'.format(quantity, catalog_name), fontsize=self.title_size) + fig.colorbar(im2[3], ax=axs[1]) + axs[1].colorbar=True + fig.colorbar(im[3], ax = axs[0]) + axs[0].colorbar=True + axs[0].set_ylabel('dec (degrees)') + axs[0].set_xlabel('RA (degrees)') + axs[1].set_ylabel('dec (degrees)') + axs[1].set_xlabel('RA (degrees)') fig.tight_layout() fig.savefig(os.path.join(output_dir, plots_tot[i])) plt.close(fig) diff --git a/descqa/srv_quality.py b/descqa/srv_quality.py new file mode 100644 index 00000000..38fe2bc9 --- /dev/null +++ b/descqa/srv_quality.py @@ -0,0 +1,346 @@ +from __future__ import print_function, division, unicode_literals, absolute_import +import os +import re +import fnmatch +from itertools import cycle +from collections import defaultdict, OrderedDict +import numpy as np +import numexpr as ne +from scipy.stats import norm, binned_statistic +from mpi4py import MPI +import time + +from .base import BaseValidationTest, TestResult +from .plotting import plt +from .parallel import send_to_master + +comm = MPI.COMM_WORLD +size = comm.Get_size() +rank = comm.Get_rank() + + +__all__ = ['CheckShear','shear_from_moments'] + + + +def shear_from_moments(Ixx,Ixy,Iyy,kind='eps'): + ''' + Get shear components from second moments + ''' + if kind=='eps': + denom = Ixx + Iyy + 2.*np.sqrt(Ixx*Iyy - Ixy**2) + elif kind=='chi': + denom = Ixx + Iyy + return (Ixx-Iyy)/denom, 2*Ixy/denom + + +class CheckEllipticity(BaseValidationTest): + """ + Check ellipticity values and second moments + """ + + def __init__(self, **kwargs): + self.catalog_filters = kwargs.get('catalog_filters', []) + self.lgndtitle_fontsize = kwargs.get('lgndtitle_fontsize', 12) + self.truncate_cat_name = kwargs.get('truncate_cat_name', False) + self.no_version = kwargs.get('no_version', False) + self.title_size = kwargs.get('title_size', 'small') + self.font_size = kwargs.get('font_size', 12) + self.legend_size = kwargs.get('legend_size', 'x-small') + self.ra = kwargs.get('ra') + self.dec = kwargs.get('dec') + self.Ixx = kwargs.get('Ixx') + self.Ixy= kwargs.get('Ixy') + self.Iyy= kwargs.get('Iyy') + self.IxxPSF= kwargs.get('IxxPSF') + self.IxyPSF= kwargs.get('IxyPSF') + self.IyyPSF= kwargs.get('IyyPSF') + self.psf_fwhm = kwargs.get('psf_fwhm') + self.bands = kwargs.get('bands') + + if not any(( + self.catalog_filters, + )): + raise ValueError('you need to specify catalog_filters for these checks, add an extendedness flag, a good flag and a magnitude range') + + self.enable_individual_summary = bool(kwargs.get('enable_individual_summary', True)) + self.enable_aggregated_summary = bool(kwargs.get('enable_aggregated_summary', False)) + self.always_show_plot = bool(kwargs.get('always_show_plot', True)) + + self.nbins = int(kwargs.get('nbins', 50)) + self.prop_cycle = None + + self.current_catalog_name = None + self.current_failed_count = None + self._aggregated_header = list() + self._aggregated_table = list() + self._individual_header = list() + self._individual_table = list() + + super(CheckEllipticity, self).__init__(**kwargs) + + def record_result(self, results, quantity_name=None, more_info=None, failed=None, individual_only=False): + if isinstance(results, dict): + self.current_failed_count += sum(1 for v in results.values() if v[1] == 'fail') + elif failed: + self.current_failed_count += 1 + + if self.enable_individual_summary: + if quantity_name is None: + self._individual_header.append(self.format_result_header(results, failed)) + else: + self._individual_table.append(self.format_result_row(results, quantity_name, more_info)) + + if self.enable_aggregated_summary and not individual_only: + if quantity_name is None: + results = '{} {}'.format(self.current_catalog_name, results) if self.current_catalog_name else results + self._aggregated_header.append(self.format_result_header(results, failed)) + else: + quantity_name = '{} {}'.format(self.current_catalog_name, quantity_name) if self.current_catalog_name else quantity_name + self._aggregated_table.append(self.format_result_row(results, quantity_name, more_info)) + + def format_result_row(self, results, quantity_name, more_info): + more_info = 'title="{}"'.format(more_info) if more_info else '' + output = ['', '{0}'.format(quantity_name, more_info)] + output.append('') + return ''.join(output) + + @staticmethod + def format_result_header(results, failed=False): + return '{0}'.format(results, 'class="fail"' if failed else '') + + def plot_moments_band(self, Ixx,Ixy,Iyy,band,output_dir): + ''' + Plot moments for each band + ''' + plt.figure() + bins = np.logspace(-1.,1.5,100) + bins_mid = (bins[1:]+bins[:-1])/2. + Ixx_out, bin_edges = np.histogram(Ixx, bins=bins) + Ixy_out, bin_edges = np.histogram(Ixy, bins=bins) + Iyy_out, bin_edges = np.histogram(Iyy, bins=bins) + self.record_result((0,'moments_'+band),'moments_'+band,'moments_'+band+'.png') + plt.plot(bins_mid,Ixx_out,'b',label='Ixx') + plt.plot(bins_mid,Iyy_out,'r--',label='Iyy') + plt.plot(bins_mid,Ixy_out,'k-.',label='Ixy') + plt.yscale('log') + plt.xscale('log') + plt.title(band) + plt.legend() + plt.savefig(os.path.join(output_dir, 'moments_'+band+'.png')) + plt.close() + return + + def plot_ellipticities_band(self,e1,e2,band,output_dir): + ''' + Plot elliptiticies for each band + ''' + plt.figure() + bins = np.linspace(0.,1.,51) + bins_mid = (bins[1:]+bins[:-1])/2. + e1_out, bin_edges = np.histogram(e1, bins=bins) + e2_out, bin_edges = np.histogram(e2, bins=bins) + self.record_result((0,'ell_'+band),'ell_'+band,'ell_'+band+'.png') + plt.plot(bins_mid,e1_out,'b',label='e1') + plt.plot(bins_mid,e2_out,'r--',label='e2') + plt.yscale('log') + #plt.xscale('log') + plt.title(band) + plt.legend() + plt.savefig(os.path.join(output_dir, 'ell_'+band+'.png')) + plt.close() + return + + def plot_psf(self,fwhm,band,output_dir): + ''' + Plot elliptiticies for each band + ''' + plt.figure() + bins = np.linspace(0.,1.5,201) + bins_mid = (bins[1:]+bins[:-1])/2. + fwhm_out, bin_edges = np.histogram(fwhm, bins=bins) + self.record_result((0,'psf_'+band),'psf_'+band,'psf_'+band+'.png') + plt.plot(bins_mid,fwhm_out,'b',label='psf') + plt.yscale('log') + plt.xlim([0.5,1.4]) + plt.title(band) + plt.legend() + plt.savefig(os.path.join(output_dir, 'psf_'+band+'.png')) + plt.close() + return + + + + + def generate_summary(self, output_dir, aggregated=False): + if aggregated: + if not self.enable_aggregated_summary: + return + header = self._aggregated_header + table = self._aggregated_table + else: + if not self.enable_individual_summary: + return + header = self._individual_header + table = self._individual_table + + with open(os.path.join(output_dir, 'SUMMARY.html'), 'w') as f: + f.write('\n') + + f.write('
\n') + + f.write('\n') + f.write('\n') + for line in table: + f.write(line) + f.write('\n') + f.write('
Quantity
\n') + + if not aggregated: + self._individual_header.clear() + self._individual_table.clear() + + def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): + + all_quantities = sorted(map(str, catalog_instance.list_all_quantities(True))) + + self.prop_cycle = cycle(iter(plt.rcParams['axes.prop_cycle'])) + self.current_catalog_name = catalog_name + self.current_failed_count = 0 + galaxy_count = None + quantity_hashes = defaultdict(set) + + if rank==0: + self.record_result('Running galaxy number test on {} {}'.format( + catalog_name, + getattr(catalog_instance, 'version', ''), + individual_only=True, + )) + + if self.truncate_cat_name: + catalog_name = catalog_name.partition("_")[0] + version = getattr(catalog_instance, 'version', '') if not self.no_version else '' + + # create filter labels + filters=[] + for i, filt in enumerate(self.catalog_filters): + filters = filt['filters'] + + print(filters) + lgnd_loc_dflt ='best' + + + label_tot=[] + plots_tot=[] + + #quantities=[self.ra,self.dec,self.Ixx,self.Iyy,self.Ixy,self.IxxPSF, self.IyyPSF, self.IxyPSF] + + # doing everything per-band first of all + for band in self.bands: + quantities=[self.Ixx+'_'+band,self.Iyy+'_'+band,self.Ixy+'_'+band,self.IxxPSF+'_'+band, self.IyyPSF+'_'+band, self.IxyPSF+'_'+band, self.psf_fwhm+'_'+band] + quantities = tuple(quantities) + + + + # reading in the data + if len(filters) > 0: + catalog_data = catalog_instance.get_quantities(quantities,filters=filters,return_iterator=False, rank=rank, size=size) + else: + catalog_data = catalog_instance.get_quantities(quantities,return_iterator=False, rank=rank, size=size) + a = time.time() + + + data_rank={} + recvbuf={} + for quantity in quantities: + data_rank[quantity] = catalog_data[quantity] + print(len(data_rank[quantity])) + recvbuf[quantity] = send_to_master(data_rank[quantity],'double') + + if rank==0: + print(len(recvbuf[quantity])) + e1,e2 = shear_from_moments(recvbuf[self.Ixx+'_'+band],recvbuf[self.Ixy+'_'+band],recvbuf[self.Iyy+'_'+band]) + e1psf,e2psf = shear_from_moments(recvbuf[self.IxxPSF+'_'+band],recvbuf[self.IxyPSF+'_'+band],recvbuf[self.IyyPSF+'_'+band]) + + Ixx = recvbuf[self.Ixx+'_'+band] + Iyy = recvbuf[self.Iyy+'_'+band] + Ixy = recvbuf[self.Ixy+'_'+band] + fwhm = recvbuf[self.psf_fwhm+'_'+band] + + + self.plot_moments_band(Ixx,Ixy,Iyy,band,output_dir) + self.plot_ellipticities_band(e1,e2,band,output_dir) + self.plot_psf(fwhm,band,output_dir) + + + # plot moments directly per filter. For good, star, galaxy + # FWHM of the psf + # calculate ellpiticities and make sure they're alright + # look at different bands + # note that we want to look by magnitude or SNR to understand the longer tail in moments + # PSF ellipticity whisker plot? + # look at what validate_drp is + + # s1/s2 plots + + # look at full ellipticity distribution test as well + #https://github.com/LSSTDESC/descqa/blob/master/descqa/EllipticityDistribution.py + #DC2 validation github - PSF ellipticity + # https://github.com/LSSTDESC/DC2-analysis/blob/master/validation/Run_1.2p_PSF_tests.ipynb + + #https://github.com/LSSTDESC/DC2-analysis/blob/master/validation/DC2_calexp_src_validation_1p2.ipynb + + # Look at notes here: + #https://github.com/LSSTDESC/DC2-production/issues/340 + + # get PSF FWHM directly from data, note comments on here: + # https://github.com/LSSTDESC/DC2-analysis/blob/u/wmwv/DR6_dask_refactor/validation/validate_dc2_run2.2i_object_table_dask.ipynb about focussing of the "telescope" + + + '''mask_finite = np.isfinite(e1)&np.isfinite(e2) + bs_out = bs(e1[mask_finite],values = e2[mask_finite],bins=100,statistic='mean') + plt.figure() + quantity_hashes[0].add('s1s2') + self.record_result((0,'s1s2'),'s1s2','p_s1s2.png') + plt.plot(bs_out[1][1:],bs_out[0]) + plt.savefig(os.path.join(output_dir, 'p_s1s2.png')) + plt.close() + + + plt.figure() + quantity_hashes[0].add('s1') + self.record_result((0,'s1'),'s1','p_s1.png') + #plt.hist(e1,bins=np.linspace(-1.,1.,100)) + plt.hist(e1psf,bins=100)#np.linspace(-1.,1.,100)) + plt.savefig(os.path.join(output_dir, 'p_s1.png')) + plt.close() + plt.figure() + quantity_hashes[0].add('s2') + self.record_result((0,'s2'),'s2','p_s2.png') + #plt.hist(e2,bins=np.linspace(-1.,1.,100)) + plt.hist(e2psf,bins=100)#np.linspace(-1.,1.,100)) + plt.savefig(os.path.join(output_dir, 'p_s2.png')) + plt.close()''' + '''plt.figure() + quantity_hashes[0].add('s12') + self.record_result((0,'s12'),'s12','p_s12.png') + plt.hist2d(e1,e2,bins=100) + plt.savefig(os.path.join(output_dir, 'p_s12.png')) + plt.close()''' + + if rank==0: + self.generate_summary(output_dir) + else: + self.current_failed_count=0 + + self.current_failed_count = comm.bcast(self.current_failed_count, root=0) + + return TestResult(passed=(self.current_failed_count == 0), score=self.current_failed_count) + + def conclude_test(self, output_dir): + self.generate_summary(output_dir, aggregated=True) diff --git a/descqa/srv_readiness_test.py b/descqa/srv_readiness_test.py index 62c6feb9..75066b81 100644 --- a/descqa/srv_readiness_test.py +++ b/descqa/srv_readiness_test.py @@ -264,7 +264,15 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): catalog_name = catalog_name.partition("_")[0] version = getattr(catalog_instance, 'version', '') if not self.no_version else '' + + filters=[] + for i, filt in enumerate(self.catalog_filters): + filters.append(filt['filters']) + filters = list(chain(*filters)) + + # check filters + """ filters = [] filter_labels = '' for d in self.catalog_filters: @@ -288,6 +296,7 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): continue print(filters, filter_labels) + """ lgnd_loc_dflt ='best' import time diff --git a/descqa/srv_shear.py b/descqa/srv_shear.py index 84d0567e..38fe2bc9 100644 --- a/descqa/srv_shear.py +++ b/descqa/srv_shear.py @@ -12,6 +12,7 @@ from .base import BaseValidationTest, TestResult from .plotting import plt +from .parallel import send_to_master comm = MPI.COMM_WORLD size = comm.Get_size() @@ -253,12 +254,16 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): catalog_data = catalog_instance.get_quantities(quantities,return_iterator=False, rank=rank, size=size) a = time.time() + data_rank={} recvbuf={} for quantity in quantities: data_rank[quantity] = catalog_data[quantity] + print(len(data_rank[quantity])) recvbuf[quantity] = send_to_master(data_rank[quantity],'double') + if rank==0: + print(len(recvbuf[quantity])) e1,e2 = shear_from_moments(recvbuf[self.Ixx+'_'+band],recvbuf[self.Ixy+'_'+band],recvbuf[self.Iyy+'_'+band]) e1psf,e2psf = shear_from_moments(recvbuf[self.IxxPSF+'_'+band],recvbuf[self.IxyPSF+'_'+band],recvbuf[self.IyyPSF+'_'+band]) diff --git a/descqa/srv_truth.py b/descqa/srv_truth.py new file mode 100644 index 00000000..38fe2bc9 --- /dev/null +++ b/descqa/srv_truth.py @@ -0,0 +1,346 @@ +from __future__ import print_function, division, unicode_literals, absolute_import +import os +import re +import fnmatch +from itertools import cycle +from collections import defaultdict, OrderedDict +import numpy as np +import numexpr as ne +from scipy.stats import norm, binned_statistic +from mpi4py import MPI +import time + +from .base import BaseValidationTest, TestResult +from .plotting import plt +from .parallel import send_to_master + +comm = MPI.COMM_WORLD +size = comm.Get_size() +rank = comm.Get_rank() + + +__all__ = ['CheckShear','shear_from_moments'] + + + +def shear_from_moments(Ixx,Ixy,Iyy,kind='eps'): + ''' + Get shear components from second moments + ''' + if kind=='eps': + denom = Ixx + Iyy + 2.*np.sqrt(Ixx*Iyy - Ixy**2) + elif kind=='chi': + denom = Ixx + Iyy + return (Ixx-Iyy)/denom, 2*Ixy/denom + + +class CheckEllipticity(BaseValidationTest): + """ + Check ellipticity values and second moments + """ + + def __init__(self, **kwargs): + self.catalog_filters = kwargs.get('catalog_filters', []) + self.lgndtitle_fontsize = kwargs.get('lgndtitle_fontsize', 12) + self.truncate_cat_name = kwargs.get('truncate_cat_name', False) + self.no_version = kwargs.get('no_version', False) + self.title_size = kwargs.get('title_size', 'small') + self.font_size = kwargs.get('font_size', 12) + self.legend_size = kwargs.get('legend_size', 'x-small') + self.ra = kwargs.get('ra') + self.dec = kwargs.get('dec') + self.Ixx = kwargs.get('Ixx') + self.Ixy= kwargs.get('Ixy') + self.Iyy= kwargs.get('Iyy') + self.IxxPSF= kwargs.get('IxxPSF') + self.IxyPSF= kwargs.get('IxyPSF') + self.IyyPSF= kwargs.get('IyyPSF') + self.psf_fwhm = kwargs.get('psf_fwhm') + self.bands = kwargs.get('bands') + + if not any(( + self.catalog_filters, + )): + raise ValueError('you need to specify catalog_filters for these checks, add an extendedness flag, a good flag and a magnitude range') + + self.enable_individual_summary = bool(kwargs.get('enable_individual_summary', True)) + self.enable_aggregated_summary = bool(kwargs.get('enable_aggregated_summary', False)) + self.always_show_plot = bool(kwargs.get('always_show_plot', True)) + + self.nbins = int(kwargs.get('nbins', 50)) + self.prop_cycle = None + + self.current_catalog_name = None + self.current_failed_count = None + self._aggregated_header = list() + self._aggregated_table = list() + self._individual_header = list() + self._individual_table = list() + + super(CheckEllipticity, self).__init__(**kwargs) + + def record_result(self, results, quantity_name=None, more_info=None, failed=None, individual_only=False): + if isinstance(results, dict): + self.current_failed_count += sum(1 for v in results.values() if v[1] == 'fail') + elif failed: + self.current_failed_count += 1 + + if self.enable_individual_summary: + if quantity_name is None: + self._individual_header.append(self.format_result_header(results, failed)) + else: + self._individual_table.append(self.format_result_row(results, quantity_name, more_info)) + + if self.enable_aggregated_summary and not individual_only: + if quantity_name is None: + results = '{} {}'.format(self.current_catalog_name, results) if self.current_catalog_name else results + self._aggregated_header.append(self.format_result_header(results, failed)) + else: + quantity_name = '{} {}'.format(self.current_catalog_name, quantity_name) if self.current_catalog_name else quantity_name + self._aggregated_table.append(self.format_result_row(results, quantity_name, more_info)) + + def format_result_row(self, results, quantity_name, more_info): + more_info = 'title="{}"'.format(more_info) if more_info else '' + output = ['', '{0}'.format(quantity_name, more_info)] + output.append('') + return ''.join(output) + + @staticmethod + def format_result_header(results, failed=False): + return '{0}'.format(results, 'class="fail"' if failed else '') + + def plot_moments_band(self, Ixx,Ixy,Iyy,band,output_dir): + ''' + Plot moments for each band + ''' + plt.figure() + bins = np.logspace(-1.,1.5,100) + bins_mid = (bins[1:]+bins[:-1])/2. + Ixx_out, bin_edges = np.histogram(Ixx, bins=bins) + Ixy_out, bin_edges = np.histogram(Ixy, bins=bins) + Iyy_out, bin_edges = np.histogram(Iyy, bins=bins) + self.record_result((0,'moments_'+band),'moments_'+band,'moments_'+band+'.png') + plt.plot(bins_mid,Ixx_out,'b',label='Ixx') + plt.plot(bins_mid,Iyy_out,'r--',label='Iyy') + plt.plot(bins_mid,Ixy_out,'k-.',label='Ixy') + plt.yscale('log') + plt.xscale('log') + plt.title(band) + plt.legend() + plt.savefig(os.path.join(output_dir, 'moments_'+band+'.png')) + plt.close() + return + + def plot_ellipticities_band(self,e1,e2,band,output_dir): + ''' + Plot elliptiticies for each band + ''' + plt.figure() + bins = np.linspace(0.,1.,51) + bins_mid = (bins[1:]+bins[:-1])/2. + e1_out, bin_edges = np.histogram(e1, bins=bins) + e2_out, bin_edges = np.histogram(e2, bins=bins) + self.record_result((0,'ell_'+band),'ell_'+band,'ell_'+band+'.png') + plt.plot(bins_mid,e1_out,'b',label='e1') + plt.plot(bins_mid,e2_out,'r--',label='e2') + plt.yscale('log') + #plt.xscale('log') + plt.title(band) + plt.legend() + plt.savefig(os.path.join(output_dir, 'ell_'+band+'.png')) + plt.close() + return + + def plot_psf(self,fwhm,band,output_dir): + ''' + Plot elliptiticies for each band + ''' + plt.figure() + bins = np.linspace(0.,1.5,201) + bins_mid = (bins[1:]+bins[:-1])/2. + fwhm_out, bin_edges = np.histogram(fwhm, bins=bins) + self.record_result((0,'psf_'+band),'psf_'+band,'psf_'+band+'.png') + plt.plot(bins_mid,fwhm_out,'b',label='psf') + plt.yscale('log') + plt.xlim([0.5,1.4]) + plt.title(band) + plt.legend() + plt.savefig(os.path.join(output_dir, 'psf_'+band+'.png')) + plt.close() + return + + + + + def generate_summary(self, output_dir, aggregated=False): + if aggregated: + if not self.enable_aggregated_summary: + return + header = self._aggregated_header + table = self._aggregated_table + else: + if not self.enable_individual_summary: + return + header = self._individual_header + table = self._individual_table + + with open(os.path.join(output_dir, 'SUMMARY.html'), 'w') as f: + f.write('\n') + + f.write('
\n') + + f.write('\n') + f.write('\n') + for line in table: + f.write(line) + f.write('\n') + f.write('
Quantity
\n') + + if not aggregated: + self._individual_header.clear() + self._individual_table.clear() + + def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): + + all_quantities = sorted(map(str, catalog_instance.list_all_quantities(True))) + + self.prop_cycle = cycle(iter(plt.rcParams['axes.prop_cycle'])) + self.current_catalog_name = catalog_name + self.current_failed_count = 0 + galaxy_count = None + quantity_hashes = defaultdict(set) + + if rank==0: + self.record_result('Running galaxy number test on {} {}'.format( + catalog_name, + getattr(catalog_instance, 'version', ''), + individual_only=True, + )) + + if self.truncate_cat_name: + catalog_name = catalog_name.partition("_")[0] + version = getattr(catalog_instance, 'version', '') if not self.no_version else '' + + # create filter labels + filters=[] + for i, filt in enumerate(self.catalog_filters): + filters = filt['filters'] + + print(filters) + lgnd_loc_dflt ='best' + + + label_tot=[] + plots_tot=[] + + #quantities=[self.ra,self.dec,self.Ixx,self.Iyy,self.Ixy,self.IxxPSF, self.IyyPSF, self.IxyPSF] + + # doing everything per-band first of all + for band in self.bands: + quantities=[self.Ixx+'_'+band,self.Iyy+'_'+band,self.Ixy+'_'+band,self.IxxPSF+'_'+band, self.IyyPSF+'_'+band, self.IxyPSF+'_'+band, self.psf_fwhm+'_'+band] + quantities = tuple(quantities) + + + + # reading in the data + if len(filters) > 0: + catalog_data = catalog_instance.get_quantities(quantities,filters=filters,return_iterator=False, rank=rank, size=size) + else: + catalog_data = catalog_instance.get_quantities(quantities,return_iterator=False, rank=rank, size=size) + a = time.time() + + + data_rank={} + recvbuf={} + for quantity in quantities: + data_rank[quantity] = catalog_data[quantity] + print(len(data_rank[quantity])) + recvbuf[quantity] = send_to_master(data_rank[quantity],'double') + + if rank==0: + print(len(recvbuf[quantity])) + e1,e2 = shear_from_moments(recvbuf[self.Ixx+'_'+band],recvbuf[self.Ixy+'_'+band],recvbuf[self.Iyy+'_'+band]) + e1psf,e2psf = shear_from_moments(recvbuf[self.IxxPSF+'_'+band],recvbuf[self.IxyPSF+'_'+band],recvbuf[self.IyyPSF+'_'+band]) + + Ixx = recvbuf[self.Ixx+'_'+band] + Iyy = recvbuf[self.Iyy+'_'+band] + Ixy = recvbuf[self.Ixy+'_'+band] + fwhm = recvbuf[self.psf_fwhm+'_'+band] + + + self.plot_moments_band(Ixx,Ixy,Iyy,band,output_dir) + self.plot_ellipticities_band(e1,e2,band,output_dir) + self.plot_psf(fwhm,band,output_dir) + + + # plot moments directly per filter. For good, star, galaxy + # FWHM of the psf + # calculate ellpiticities and make sure they're alright + # look at different bands + # note that we want to look by magnitude or SNR to understand the longer tail in moments + # PSF ellipticity whisker plot? + # look at what validate_drp is + + # s1/s2 plots + + # look at full ellipticity distribution test as well + #https://github.com/LSSTDESC/descqa/blob/master/descqa/EllipticityDistribution.py + #DC2 validation github - PSF ellipticity + # https://github.com/LSSTDESC/DC2-analysis/blob/master/validation/Run_1.2p_PSF_tests.ipynb + + #https://github.com/LSSTDESC/DC2-analysis/blob/master/validation/DC2_calexp_src_validation_1p2.ipynb + + # Look at notes here: + #https://github.com/LSSTDESC/DC2-production/issues/340 + + # get PSF FWHM directly from data, note comments on here: + # https://github.com/LSSTDESC/DC2-analysis/blob/u/wmwv/DR6_dask_refactor/validation/validate_dc2_run2.2i_object_table_dask.ipynb about focussing of the "telescope" + + + '''mask_finite = np.isfinite(e1)&np.isfinite(e2) + bs_out = bs(e1[mask_finite],values = e2[mask_finite],bins=100,statistic='mean') + plt.figure() + quantity_hashes[0].add('s1s2') + self.record_result((0,'s1s2'),'s1s2','p_s1s2.png') + plt.plot(bs_out[1][1:],bs_out[0]) + plt.savefig(os.path.join(output_dir, 'p_s1s2.png')) + plt.close() + + + plt.figure() + quantity_hashes[0].add('s1') + self.record_result((0,'s1'),'s1','p_s1.png') + #plt.hist(e1,bins=np.linspace(-1.,1.,100)) + plt.hist(e1psf,bins=100)#np.linspace(-1.,1.,100)) + plt.savefig(os.path.join(output_dir, 'p_s1.png')) + plt.close() + plt.figure() + quantity_hashes[0].add('s2') + self.record_result((0,'s2'),'s2','p_s2.png') + #plt.hist(e2,bins=np.linspace(-1.,1.,100)) + plt.hist(e2psf,bins=100)#np.linspace(-1.,1.,100)) + plt.savefig(os.path.join(output_dir, 'p_s2.png')) + plt.close()''' + '''plt.figure() + quantity_hashes[0].add('s12') + self.record_result((0,'s12'),'s12','p_s12.png') + plt.hist2d(e1,e2,bins=100) + plt.savefig(os.path.join(output_dir, 'p_s12.png')) + plt.close()''' + + if rank==0: + self.generate_summary(output_dir) + else: + self.current_failed_count=0 + + self.current_failed_count = comm.bcast(self.current_failed_count, root=0) + + return TestResult(passed=(self.current_failed_count == 0), score=self.current_failed_count) + + def conclude_test(self, output_dir): + self.generate_summary(output_dir, aggregated=True) diff --git a/descqa/srv_txpipe.py b/descqa/srv_txpipe.py new file mode 100644 index 00000000..38fe2bc9 --- /dev/null +++ b/descqa/srv_txpipe.py @@ -0,0 +1,346 @@ +from __future__ import print_function, division, unicode_literals, absolute_import +import os +import re +import fnmatch +from itertools import cycle +from collections import defaultdict, OrderedDict +import numpy as np +import numexpr as ne +from scipy.stats import norm, binned_statistic +from mpi4py import MPI +import time + +from .base import BaseValidationTest, TestResult +from .plotting import plt +from .parallel import send_to_master + +comm = MPI.COMM_WORLD +size = comm.Get_size() +rank = comm.Get_rank() + + +__all__ = ['CheckShear','shear_from_moments'] + + + +def shear_from_moments(Ixx,Ixy,Iyy,kind='eps'): + ''' + Get shear components from second moments + ''' + if kind=='eps': + denom = Ixx + Iyy + 2.*np.sqrt(Ixx*Iyy - Ixy**2) + elif kind=='chi': + denom = Ixx + Iyy + return (Ixx-Iyy)/denom, 2*Ixy/denom + + +class CheckEllipticity(BaseValidationTest): + """ + Check ellipticity values and second moments + """ + + def __init__(self, **kwargs): + self.catalog_filters = kwargs.get('catalog_filters', []) + self.lgndtitle_fontsize = kwargs.get('lgndtitle_fontsize', 12) + self.truncate_cat_name = kwargs.get('truncate_cat_name', False) + self.no_version = kwargs.get('no_version', False) + self.title_size = kwargs.get('title_size', 'small') + self.font_size = kwargs.get('font_size', 12) + self.legend_size = kwargs.get('legend_size', 'x-small') + self.ra = kwargs.get('ra') + self.dec = kwargs.get('dec') + self.Ixx = kwargs.get('Ixx') + self.Ixy= kwargs.get('Ixy') + self.Iyy= kwargs.get('Iyy') + self.IxxPSF= kwargs.get('IxxPSF') + self.IxyPSF= kwargs.get('IxyPSF') + self.IyyPSF= kwargs.get('IyyPSF') + self.psf_fwhm = kwargs.get('psf_fwhm') + self.bands = kwargs.get('bands') + + if not any(( + self.catalog_filters, + )): + raise ValueError('you need to specify catalog_filters for these checks, add an extendedness flag, a good flag and a magnitude range') + + self.enable_individual_summary = bool(kwargs.get('enable_individual_summary', True)) + self.enable_aggregated_summary = bool(kwargs.get('enable_aggregated_summary', False)) + self.always_show_plot = bool(kwargs.get('always_show_plot', True)) + + self.nbins = int(kwargs.get('nbins', 50)) + self.prop_cycle = None + + self.current_catalog_name = None + self.current_failed_count = None + self._aggregated_header = list() + self._aggregated_table = list() + self._individual_header = list() + self._individual_table = list() + + super(CheckEllipticity, self).__init__(**kwargs) + + def record_result(self, results, quantity_name=None, more_info=None, failed=None, individual_only=False): + if isinstance(results, dict): + self.current_failed_count += sum(1 for v in results.values() if v[1] == 'fail') + elif failed: + self.current_failed_count += 1 + + if self.enable_individual_summary: + if quantity_name is None: + self._individual_header.append(self.format_result_header(results, failed)) + else: + self._individual_table.append(self.format_result_row(results, quantity_name, more_info)) + + if self.enable_aggregated_summary and not individual_only: + if quantity_name is None: + results = '{} {}'.format(self.current_catalog_name, results) if self.current_catalog_name else results + self._aggregated_header.append(self.format_result_header(results, failed)) + else: + quantity_name = '{} {}'.format(self.current_catalog_name, quantity_name) if self.current_catalog_name else quantity_name + self._aggregated_table.append(self.format_result_row(results, quantity_name, more_info)) + + def format_result_row(self, results, quantity_name, more_info): + more_info = 'title="{}"'.format(more_info) if more_info else '' + output = ['', '{0}'.format(quantity_name, more_info)] + output.append('') + return ''.join(output) + + @staticmethod + def format_result_header(results, failed=False): + return '{0}'.format(results, 'class="fail"' if failed else '') + + def plot_moments_band(self, Ixx,Ixy,Iyy,band,output_dir): + ''' + Plot moments for each band + ''' + plt.figure() + bins = np.logspace(-1.,1.5,100) + bins_mid = (bins[1:]+bins[:-1])/2. + Ixx_out, bin_edges = np.histogram(Ixx, bins=bins) + Ixy_out, bin_edges = np.histogram(Ixy, bins=bins) + Iyy_out, bin_edges = np.histogram(Iyy, bins=bins) + self.record_result((0,'moments_'+band),'moments_'+band,'moments_'+band+'.png') + plt.plot(bins_mid,Ixx_out,'b',label='Ixx') + plt.plot(bins_mid,Iyy_out,'r--',label='Iyy') + plt.plot(bins_mid,Ixy_out,'k-.',label='Ixy') + plt.yscale('log') + plt.xscale('log') + plt.title(band) + plt.legend() + plt.savefig(os.path.join(output_dir, 'moments_'+band+'.png')) + plt.close() + return + + def plot_ellipticities_band(self,e1,e2,band,output_dir): + ''' + Plot elliptiticies for each band + ''' + plt.figure() + bins = np.linspace(0.,1.,51) + bins_mid = (bins[1:]+bins[:-1])/2. + e1_out, bin_edges = np.histogram(e1, bins=bins) + e2_out, bin_edges = np.histogram(e2, bins=bins) + self.record_result((0,'ell_'+band),'ell_'+band,'ell_'+band+'.png') + plt.plot(bins_mid,e1_out,'b',label='e1') + plt.plot(bins_mid,e2_out,'r--',label='e2') + plt.yscale('log') + #plt.xscale('log') + plt.title(band) + plt.legend() + plt.savefig(os.path.join(output_dir, 'ell_'+band+'.png')) + plt.close() + return + + def plot_psf(self,fwhm,band,output_dir): + ''' + Plot elliptiticies for each band + ''' + plt.figure() + bins = np.linspace(0.,1.5,201) + bins_mid = (bins[1:]+bins[:-1])/2. + fwhm_out, bin_edges = np.histogram(fwhm, bins=bins) + self.record_result((0,'psf_'+band),'psf_'+band,'psf_'+band+'.png') + plt.plot(bins_mid,fwhm_out,'b',label='psf') + plt.yscale('log') + plt.xlim([0.5,1.4]) + plt.title(band) + plt.legend() + plt.savefig(os.path.join(output_dir, 'psf_'+band+'.png')) + plt.close() + return + + + + + def generate_summary(self, output_dir, aggregated=False): + if aggregated: + if not self.enable_aggregated_summary: + return + header = self._aggregated_header + table = self._aggregated_table + else: + if not self.enable_individual_summary: + return + header = self._individual_header + table = self._individual_table + + with open(os.path.join(output_dir, 'SUMMARY.html'), 'w') as f: + f.write('\n') + + f.write('
\n') + + f.write('\n') + f.write('\n') + for line in table: + f.write(line) + f.write('\n') + f.write('
Quantity
\n') + + if not aggregated: + self._individual_header.clear() + self._individual_table.clear() + + def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): + + all_quantities = sorted(map(str, catalog_instance.list_all_quantities(True))) + + self.prop_cycle = cycle(iter(plt.rcParams['axes.prop_cycle'])) + self.current_catalog_name = catalog_name + self.current_failed_count = 0 + galaxy_count = None + quantity_hashes = defaultdict(set) + + if rank==0: + self.record_result('Running galaxy number test on {} {}'.format( + catalog_name, + getattr(catalog_instance, 'version', ''), + individual_only=True, + )) + + if self.truncate_cat_name: + catalog_name = catalog_name.partition("_")[0] + version = getattr(catalog_instance, 'version', '') if not self.no_version else '' + + # create filter labels + filters=[] + for i, filt in enumerate(self.catalog_filters): + filters = filt['filters'] + + print(filters) + lgnd_loc_dflt ='best' + + + label_tot=[] + plots_tot=[] + + #quantities=[self.ra,self.dec,self.Ixx,self.Iyy,self.Ixy,self.IxxPSF, self.IyyPSF, self.IxyPSF] + + # doing everything per-band first of all + for band in self.bands: + quantities=[self.Ixx+'_'+band,self.Iyy+'_'+band,self.Ixy+'_'+band,self.IxxPSF+'_'+band, self.IyyPSF+'_'+band, self.IxyPSF+'_'+band, self.psf_fwhm+'_'+band] + quantities = tuple(quantities) + + + + # reading in the data + if len(filters) > 0: + catalog_data = catalog_instance.get_quantities(quantities,filters=filters,return_iterator=False, rank=rank, size=size) + else: + catalog_data = catalog_instance.get_quantities(quantities,return_iterator=False, rank=rank, size=size) + a = time.time() + + + data_rank={} + recvbuf={} + for quantity in quantities: + data_rank[quantity] = catalog_data[quantity] + print(len(data_rank[quantity])) + recvbuf[quantity] = send_to_master(data_rank[quantity],'double') + + if rank==0: + print(len(recvbuf[quantity])) + e1,e2 = shear_from_moments(recvbuf[self.Ixx+'_'+band],recvbuf[self.Ixy+'_'+band],recvbuf[self.Iyy+'_'+band]) + e1psf,e2psf = shear_from_moments(recvbuf[self.IxxPSF+'_'+band],recvbuf[self.IxyPSF+'_'+band],recvbuf[self.IyyPSF+'_'+band]) + + Ixx = recvbuf[self.Ixx+'_'+band] + Iyy = recvbuf[self.Iyy+'_'+band] + Ixy = recvbuf[self.Ixy+'_'+band] + fwhm = recvbuf[self.psf_fwhm+'_'+band] + + + self.plot_moments_band(Ixx,Ixy,Iyy,band,output_dir) + self.plot_ellipticities_band(e1,e2,band,output_dir) + self.plot_psf(fwhm,band,output_dir) + + + # plot moments directly per filter. For good, star, galaxy + # FWHM of the psf + # calculate ellpiticities and make sure they're alright + # look at different bands + # note that we want to look by magnitude or SNR to understand the longer tail in moments + # PSF ellipticity whisker plot? + # look at what validate_drp is + + # s1/s2 plots + + # look at full ellipticity distribution test as well + #https://github.com/LSSTDESC/descqa/blob/master/descqa/EllipticityDistribution.py + #DC2 validation github - PSF ellipticity + # https://github.com/LSSTDESC/DC2-analysis/blob/master/validation/Run_1.2p_PSF_tests.ipynb + + #https://github.com/LSSTDESC/DC2-analysis/blob/master/validation/DC2_calexp_src_validation_1p2.ipynb + + # Look at notes here: + #https://github.com/LSSTDESC/DC2-production/issues/340 + + # get PSF FWHM directly from data, note comments on here: + # https://github.com/LSSTDESC/DC2-analysis/blob/u/wmwv/DR6_dask_refactor/validation/validate_dc2_run2.2i_object_table_dask.ipynb about focussing of the "telescope" + + + '''mask_finite = np.isfinite(e1)&np.isfinite(e2) + bs_out = bs(e1[mask_finite],values = e2[mask_finite],bins=100,statistic='mean') + plt.figure() + quantity_hashes[0].add('s1s2') + self.record_result((0,'s1s2'),'s1s2','p_s1s2.png') + plt.plot(bs_out[1][1:],bs_out[0]) + plt.savefig(os.path.join(output_dir, 'p_s1s2.png')) + plt.close() + + + plt.figure() + quantity_hashes[0].add('s1') + self.record_result((0,'s1'),'s1','p_s1.png') + #plt.hist(e1,bins=np.linspace(-1.,1.,100)) + plt.hist(e1psf,bins=100)#np.linspace(-1.,1.,100)) + plt.savefig(os.path.join(output_dir, 'p_s1.png')) + plt.close() + plt.figure() + quantity_hashes[0].add('s2') + self.record_result((0,'s2'),'s2','p_s2.png') + #plt.hist(e2,bins=np.linspace(-1.,1.,100)) + plt.hist(e2psf,bins=100)#np.linspace(-1.,1.,100)) + plt.savefig(os.path.join(output_dir, 'p_s2.png')) + plt.close()''' + '''plt.figure() + quantity_hashes[0].add('s12') + self.record_result((0,'s12'),'s12','p_s12.png') + plt.hist2d(e1,e2,bins=100) + plt.savefig(os.path.join(output_dir, 'p_s12.png')) + plt.close()''' + + if rank==0: + self.generate_summary(output_dir) + else: + self.current_failed_count=0 + + self.current_failed_count = comm.bcast(self.current_failed_count, root=0) + + return TestResult(passed=(self.current_failed_count == 0), score=self.current_failed_count) + + def conclude_test(self, output_dir): + self.generate_summary(output_dir, aggregated=True) diff --git a/descqarun/master.py b/descqarun/master.py index 1c5aa5fb..20d92668 100644 --- a/descqarun/master.py +++ b/descqarun/master.py @@ -451,7 +451,6 @@ def main(): sys.path = [make_path_absolute(path) for path in args.paths] + sys.path - global GCRCatalogs #pylint: disable=W0601 sys.path.insert(0,'/global/homes/p/plarsen/plarsen_git/gcr-catalogs') GCRCatalogs = importlib.import_module('GCRCatalogs') diff --git a/run_master_parallel.sh b/run_master_parallel.sh index 5b6ef63a..8b18226a 100755 --- a/run_master_parallel.sh +++ b/run_master_parallel.sh @@ -1,5 +1,4 @@ #!/bin/bash - # go to a subshell ( @@ -7,9 +6,14 @@ #set -e # activate DESC python environment -source /global/common/software/lsst/common/miniconda/setup_current_python.sh "" +#source /global/common/software/lsst/common/miniconda/setup_current_python.sh "" +#source /cvmfs/sw.lsst.eu/linux-x86_64/lsst_distrib/w_2022_41/loadLSST.bash "" +#source /global/common/software/lsst/common/miniconda/kernels/stack-weekly.sh "" PYTHON='python' - +#echo $PYTHONPATH > a.out +source /global/homes/p/plarsen/plarsen_git/lsst_stack/loadLSST.bash +setup lsst_distrib +#echo $PYTHONPATH > b.out # set output directory OUTPUTDIR="/global/cfs/cdirs/lsst/groups/CS/descqa/run/v2" @@ -17,9 +21,12 @@ OUTPUTDIR="/global/cfs/cdirs/lsst/groups/CS/descqa/run/v2" set -o noglob # run master.py -OMP_NUM_THREADS=1 +OMP_NUM_THREADS=8 +#CMD="/global/common/software/lsst/common/miniconda/start-kernel-cli.py desc-stack-weekly" +#CMD2="import sys; sys.path.insert(0,'/global/homes/p/plarsen/plarsen_git/descqa'); sys.path.insert(0,'/global/homes/p/plarsen/plarsen_git/gcr-catalogs'); sys.path.insert(0,'/global/homes/p/plarsen/plarsen_git/generic-catalog-reader'); sys.path.insert(0,'/global/homes/p/plarsen/plarsen_git/easyquery'); import GCR; import GCRCatalogs; import descqarun; descqarun.main()" +#mpirun -n 1 CMD="import descqarun; descqarun.main()" -mpirun -n 1 $PYTHON -E -c "$CMD" "$OUTPUTDIR" "$@" +mpirun -n 8 /global/homes/p/plarsen/plarsen_git/lsst_stack/conda/miniconda3-py38_4.9.2/envs/lsst-scipipe-4.0.0/bin/python -c "$CMD" "$OUTPUTDIR" "$@" #export OMP_NUM_THREADS=8 # end subshell diff --git a/run_master_slurm.sh b/run_master_slurm.sh index 435a51c3..3a2c7e2a 100755 --- a/run_master_slurm.sh +++ b/run_master_slurm.sh @@ -10,7 +10,7 @@ echo "--------- Running SRUN script ---------" ( # make sure all commands are executed -set -e +#set -e # activate DESC python environment source /global/common/software/lsst/common/miniconda/setup_current_python.sh "" @@ -24,8 +24,8 @@ set -o noglob # run master.py CMD="import descqarun; descqarun.main()" -export OMP_NUM_THREADS=1 -mpiexec -hostfile $HOSTFILE -n 64 $PYTHON -E -c "$CMD" "$OUTPUTDIR" "$@" +export OMP_NUM_THREADS=8 +mpiexec -hostfile $HOSTFILE -n 8 $PYTHON -E -c "$CMD" "$OUTPUTDIR" "$@" # end subshell ) diff --git a/setup.py b/setup.py index 795ed156..9a5f332e 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ 'Programming Language :: Python :: 3.6', ], keywords='DESCQA', - packages=['descqa'] # PL note: need to update version requirement for GCR + packages=['descqa'], install_requires=['future', 'pyyaml', 'jinja2'], extras_require={ 'full': ['numpy', 'scipy', 'matplotlib', 'GCR>=0.8.7', 'healpy', 'treecorr', 'camb', 'scikit-learn', 'pandas', 'astropy', 'POT', 'numba',