diff --git a/examples/example_run.py b/examples/example_run.py index 18907d6..c935bd6 100644 --- a/examples/example_run.py +++ b/examples/example_run.py @@ -22,12 +22,12 @@ def main(): # run the test results = sys_test(data, data2=data2, config=stile_args) - fig = sys_test.plot(results) - fig.savefig(sys_test.short_name+'.png') + #fig = sys_test.plot(results) + #fig.savefig(sys_test.short_name+'.png') stile.WriteASCIITable('realshear.dat',results) print "Done with unbinned systematics test" - + # do with binning data = dh.getData(data_ids[0],'galaxy lens','single','field','table') # turns a list of binning schemes into a pseudo-nested list of single bins @@ -38,7 +38,6 @@ def main(): for bin_list in expanded_bin_list: bins_name = '-'.join([bl.short_name for bl in bin_list]) data2 = dh.getData(data_ids[1],'galaxy','single','field','table',bin_list=bin_list) - results = sys_test(data, data2=data2, config=stile_args) stile.WriteASCIITable('realshear-'+bins_name+'.dat',results) fig = sys_test.plot(results) @@ -47,4 +46,4 @@ def main(): if __name__=='__main__': main() - + diff --git a/stile/sys_tests.py b/stile/sys_tests.py index 36cc855..9fb3cbd 100644 --- a/stile/sys_tests.py +++ b/stile/sys_tests.py @@ -1,6 +1,23 @@ """@file sys_tests.py Contains the class definitions of the Stile systematics tests. """ + +""" +This file contains some code from the AstroML package (http://github.com/astroML/astroML). +For that code: + +Copyright (c) 2012-2013, Jacob Vanderplas +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + + import numpy import stile import stile_utils @@ -128,24 +145,27 @@ def __init__(self, t_field=None, t_title=None, x_field=None, x_title=None, def CorrelationFunctionSysTest(type=None): """ - Initialize an instance of a BaseCorrelationFunctionSysTest type, based on the 'type' kwarg + Initialize an instance of a BaseCorrelationFunctionSysTest type, based on the 'type' kwarg given. Options are: - - GalaxyShear: tangential and cross shear of 'galaxy' type objects around 'galaxy lens' - type objects (point-shear correlation function) + - GalaxyShear: tangential and cross shear of 'galaxy' type objects around 'galaxy lens' + type objects - BrightStarShear: tangential and cross shear of 'galaxy' type objects around 'star bright' - type objects (point-shear) - - StarXGalaxyDensity: number density of 'galaxy' objects around 'star' objects (point-point) - - StarXGalaxyShear: shear-shear cross correlation of 'galaxy' and 'star' type objects - (shear-shear) - - StarXStarShear: autocorrelation of the shapes of 'star' type objects (shear-shear) - - StarXStarSize: autocorrelation of the size residuals for 'star' type objects relative to - PSF sizes (scalar-scalar) - - GalaxyDensityCorrelation: position autocorrelation of 'galaxy' type objects (point-point) - - StarDensityCorrelation: position autocorrelation of 'star' type objects (point-point) - - Rho1: rho1 statistics (autocorrelation of residual star shapes, shear-shear) - - None: an empty BaseCorrelationFunctionSysTest class instance, which can be used for - multiple types of correlation functions. See the documentation for - BaseCorrelationFunctionSysTest for more details. Note that this type has a + type objects + - StarXGalaxyDensity: number density of 'galaxy' objects around 'star' objects + - StarXGalaxyShear: shear-shear cross correlation of 'galaxy' and 'star' type objects + - StarXStarShear: autocorrelation of the shapes of 'star' type objects + - StarXStarSizeResidual: autocorrelation of the size residuals for 'star' type objects + relative to PSF sizes + - GalaxyDensityCorrelation: position autocorrelation of 'galaxy' type objects + - StarDensityCorrelation: position autocorrelation of 'star' type objects + - Rho1: rho1 statistics (autocorrelation of residual star shapes) + - Rho2: rho2 statistics (correlation of star and PSF shapes) + - Rho3: rho3 statistics (autocorrelation of star shapes weighted by the residual size) + - Rho4: rho4 statistics (correlation of residual star shapes weighted by residual size) + - Rho5: rho5 statistics (correlation of star and PSF shapes weighted by the residual size) + - None: an empty BaseCorrelationFunctionSysTest class instance, which can be used for + multiple types of correlation functions. See the documentation for + BaseCorrelationFunctionSysTest for more details. Note that this type has a slightly different call signature than the other methods (with the correlation function type given as the first argument) and that it lacks many of the convenience variables the other CorrelationFunctions have, such as self.objects_list and self.required_quantities. @@ -189,31 +209,65 @@ def CorrelationFunctionSysTest(type=None): return StarXGalaxyShearSysTest() elif type=='StarXStarShear': return StarXStarShearSysTest() - elif type=='StarXStarSize': - return StarXStarSizeSysTest() + elif type=='StarXStarSizeResidual': + return StarXStarSizeResidualSysTest() elif type=='GalaxyDensityCorrelation': return GalaxyDensityCorrelationSysTest() elif type=='StarDensityCorrelation': return StarDensityCorrelationSysTest() elif type=='Rho1': return Rho1SysTest() + elif type=='Rho2': + return Rho2SysTest() + elif type=='Rho3': + return Rho3SysTest() + elif type=='Rho4': + return Rho4SysTest() + elif type=='Rho5': + return Rho5SysTest() else: raise ValueError('Unknown correlation function type %s given to type kwarg'%type) - - + + class BaseCorrelationFunctionSysTest(SysTest): """ A base class for the Stile systematics tests that use correlation functions. This implements the class method getCF(), which runs a TreeCorr correlation function on a given set of data. Exact - arguments to this method should be created by child classes of BaseCorrelationFunctionSysTest; - see the docstring for BaseCorrelationFunctionSysTest.getCF() for information on how to write + arguments to this method should be created by child classes of BaseCorrelationFunctionSysTest; + see the docstring for BaseCorrelationFunctionSysTest.getCF() for information on how to write further tests using it. """ short_name = 'corrfunc' # Set the details (such as field names and titles) for all the possible plots generated by # TreeCorr - plot_details = [PlotDetails(t_field='omega', t_title='$\omega$', - sigma_field='sig_omega', y_title="$\omega$"), # n2 + plot_details = [ + PlotDetails(t_field='gamT', t_title=r'$\langle \gamma_T \rangle$', + x_field='gamX', x_title=r'$\langle \gamma_X \rangle$', + datarandom_t_field='gamT_', datarandom_t_title='$\gamma_{T', + datarandom_x_field='gamX_', datarandom_x_title='$\gamma_{X', + sigma_field='sigma', y_title="$\gamma$"), # ng + PlotDetails(t_field='xip', t_title=r'$\xi_+$', x_field='xim', x_title=r'$\xi_-$', + t_im_field='xip_im', t_im_title=r'$\xi_{+,im}$', + x_im_field='xip_im', x_im_title=r'$\xi_{-,im}$', + sigma_field='sigma_xi', y_title=r"$\xi$"), # gg + PlotDetails(t_field='kappa', t_title=r'$\langle \kappa \rangle$', + datarandom_t_field='kappa_', datarandom_t_title='$kappa_{', + sigma_field='sigma', y_title="$\kappa$"), # nk + PlotDetails(t_field='xi', t_title=r'$\xi$', sigma_field='sigma_xi', y_title=r"$\xi$"), # n2 or k2 + PlotDetails(t_field='kgamT', t_title=r'$\langle \kappa \gamma_T\rangle$', + x_field='kgamX', x_title=r'$\langle \kappa \gamma_X\rangle$', + datarandom_t_field='kgamT_', datarandom_t_title=r'$\kappa \gamma_{T', + datarandom_x_field='kgamX_', datarandom_x_title=r'$\kappa \gamma_{X', + sigma_field='sigma', y_title="$\kappa\gamma$"), # kg + PlotDetails(t_field='Mapsq', t_title=r'$\langle M_{ap}^2 \rangle$', + x_field='Mxsq', x_title=r'$\langle M_x^2\rangle$', + t_im_field='MMxa', t_im_title=r'$\langle MM_x \rangle(a)$', + x_im_field='Mmxb', x_im_title=r'$\langle MM_x \rangle(b)$', + sigma_field='sig_map', y_title="$M_{ap}^2$"), # m2 + PlotDetails(t_field='NMap', t_title=r'$\langle NM_{ap} \rangle$', + x_field='NMx', x_title=r'$\langle NM_{x} \rangle$', + sigma_field='sig_nmap', y_title="$NM_{ap}$"), # nm or norm + # For TreeCorr versions <= 3.1, these had different column names. PlotDetails(t_field='', t_title=r'$\langle \gamma_T \rangle$', x_field='', x_title=r'$\langle \gamma_X \rangle$', datarandom_t_field='gamT_', datarandom_t_title='$\gamma_{T', @@ -239,7 +293,7 @@ class method getCF(), which runs a TreeCorr correlation function on a given set sigma_field='sig_map', y_title="$M_{ap}^2$"), # m2 PlotDetails(t_field='', t_title=r'$\langle NM_{ap} \rangle$', x_field='', x_title=r'$\langle NM_{x} \rangle$', - sigma_field='sig_nmap', y_title="$NM_{ap}$") # nm or norm + sigma_field='sig_nmap', y_title="$NM_{ap}$"), # nm or norm ] def makeCatalog(self, data, config=None, use_as_k=None, use_chip_coords=False): @@ -495,11 +549,10 @@ def plot(self, data, colors=['r', 'b'], log_yscale=False, return None fields = data.dtype.names # Pick which radius measurement to use - for t_r in ['', 'R_nominal', 'R']: - is_r = [t_r in f for f in fields] - if any(is_r): - indx = is_r.index(True) - r = fields[indx] + # TreeCorr changed the name of the output columns + for t_r in ['meanR', 'R_nom', '', 'R_nominal', 'R']: + if t_r in fields: + r = t_r break else: raise ValueError('No radius parameter found in data') @@ -601,7 +654,7 @@ def plot(self, data, colors=['r', 'b'], log_yscale=False, def __call__(self, *args, **kwargs): return self.getCF(*args, **kwargs) - + class GalaxyShearSysTest(BaseCorrelationFunctionSysTest): """ @@ -720,6 +773,170 @@ def __call__(self, data, data2=None, random=None, random2=None, config=None, **k return self.getCF('gg', new_data, new_data2, new_random, new_random2, config=config, **kwargs) +class Rho2SysTest(BaseCorrelationFunctionSysTest): + """ + Compute the correlation of PSF shapes with residual star shapes (star shapes - psf shapes). + """ + short_name = 'rho2' + long_name = 'Rho2 statistics (Correlation of PSF shapes with star-PSF shapes)' + objects_list = ['star PSF'] + required_quantities = [('ra', 'dec', 'g1', 'g2', 'psf_g1', 'psf_g2', 'w')] + + def __call__(self, data, data2=None, random=None, random2=None, config=None, **kwargs): + new_data = numpy.rec.fromarrays([data['ra'], data['dec'], data['psf_g1'], + data['psf_g2'], data['w']], + names = ['ra', 'dec', 'g1', 'g2', 'w']) + if data2 is None: + data2 = data + new_data2 = numpy.rec.fromarrays([data2['ra'], data2['dec'], data2['g1']-data2['psf_g1'], + data2['g2']-data2['psf_g2'], data2['w']], + names = ['ra', 'dec', 'g1', 'g2', 'w']) + if random is not None: + new_random = numpy.rec.fromarrays([random['ra'], random['dec'], random['psf_g1'], + random['psf_g2'], random['w']], + names = ['ra', 'dec', 'g1', 'g2', 'w']) + + else: + new_random = random + if random2 is None: + random2 = random + if random2 is not None: + new_random2 = numpy.rec.fromarrays([data2['ra'], data2['dec'], + data2['g1']-data2['psf_g1'], + data2['g2']-data2['psf_g2'], data2['w']], + names = ['ra', 'dec', 'g1', 'g2', 'w']) + else: + new_random2 = random2 + return self.getCF('gg', new_data, new_data2, new_random, new_random2, + config=config, **kwargs) + + +class Rho3SysTest(BaseCorrelationFunctionSysTest): + """ + Compute the correlation of star shapes weighted by the residual size. + """ + short_name = 'rho3' + long_name = 'Rho3 statistics (Auto-correlation of star shapes weighted by the residual size)' + objects_list = ['star PSF'] + required_quantities = [('ra', 'dec', 'sigma', + 'psf_g1', 'psf_g2', 'psf_sigma', 'w')] + + def __call__(self, data, data2=None, random=None, random2=None, config=None, **kwargs): + new_data = numpy.rec.fromarrays([data['ra'], data['dec'], + data['psf_g1']*(data['sigma']-data['psf_sigma'])/data['psf_sigma'], + data['psf_g2']*(data['sigma']-data['psf_sigma'])/data['psf_sigma'], + data['w']], + names = ['ra', 'dec', 'g1', 'g2', 'w']) + if data2 is not None: + new_data2 = numpy.rec.fromarrays([data2['ra'], data2['dec'], + data2['psf_g1']*(data2['sigma']-data2['psf_sigma'])/data2['psf_sigma'], + data2['psf_g2']*(data2['sigma']-data2['psf_sigma'])/data2['psf_sigma'], + data2['w']], + names = ['ra', 'dec', 'g1', 'g2', 'w']) + else: + new_data2 = data2 + if random is not None: + new_random = numpy.rec.fromarrays([random['ra'], random['dec'], + random['psf_g1']*(random['sigma']-random['psf_sigma'])/random['psf_sigma'], + random['psf_g2']*(random['sigma']-random['psf_sigma'])/random['psf_sigma'], + random['w']], + names = ['ra', 'dec', 'g1', 'g2', 'w']) + else: + new_random = random + + if random2 is not None: + new_random2 = numpy.rec.fromarrays([random2['ra'], random2['dec'], + random2['psf_g1']*(random2['sigma']-random2['psf_sigma'])/random2['psf_sigma'], + random2['psf_g2']*(random2['sigma']-random2['psf_sigma'])/random2['psf_sigma'], + random2['w']], + names = ['ra', 'dec', 'g1', 'g2', 'w']) + else: + new_random2 = random2 + return self.getCF('gg', new_data, new_data2, new_random, new_random2, + config=config, **kwargs) + +class Rho4SysTest(BaseCorrelationFunctionSysTest): + """ + Compute the correlation of star shapes weighted by the residual size. + """ + short_name = 'rho4' + long_name = 'Rho4 statistics (Correlation of residual star shapes weighted by residual size)' + objects_list = ['star PSF'] + required_quantities = [('ra', 'dec', 'g1', 'g2', 'sigma', + 'psf_g1', 'psf_g2', 'psf_sigma', 'w')] + + def __call__(self, data, data2=None, random=None, random2=None, config=None, **kwargs): + new_data = numpy.rec.fromarrays([data['ra'], data['dec'], data['g1'] - data['psf_g1'], + data['g2']-data['psf_g2'], data['w']], + names = ['ra', 'dec', 'g1', 'g2', 'w']) + if data2 is None: + data2 = data + new_data2 = numpy.rec.fromarrays([data2['ra'], data2['dec'], + data2['psf_g1']*(data2['sigma']-data2['psf_sigma'])/data2['psf_sigma'], + data2['psf_g2']*(data2['sigma']-data2['psf_sigma'])/data2['psf_sigma'], + data2['w']], + names = ['ra', 'dec', 'g1', 'g2', 'w']) + if random is not None: + new_random = numpy.rec.fromarrays([random['ra'], random['dec'], + random['g1']-random['psf_g1'], + random['g2']-random['psf_g2'], random['w']], + names = ['ra', 'dec', 'g1', 'g2', 'w']) + else: + new_random = random + if random2 is None: + random2 = random + if random2 is not None: + new_random2 = numpy.rec.fromarrays([random2['ra'], random2['dec'], + random2['psf_g1']*(random2['sigma']-random2['psf_sigma'])/random2['psf_sigma'], + random2['psf_g2']*(random2['sigma']-random2['psf_sigma'])/random2['psf_sigma'], + random2['w']], + names = ['ra', 'dec', 'g1', 'g2', 'w']) + else: + new_random2 = random2 + return self.getCF('gg', new_data, new_data2, new_random, new_random2, + config=config, **kwargs) + +class Rho5SysTest(BaseCorrelationFunctionSysTest): + """ + Compute the correlation of star shapes weighted by the residual size. + """ + short_name = 'rho5' + long_name = 'Rho5 statistics (Correlation of star and PSF shapes weighted by residual size)' + objects_list = ['star PSF'] + required_quantities = [('ra', 'dec', 'sigma', + 'psf_g1', 'psf_g2', 'psf_sigma', 'w')] + + def __call__(self, data, data2=None, random=None, random2=None, config=None, **kwargs): + new_data = numpy.rec.fromarrays([data['ra'], data['dec'],data['psf_g1'], + data['psf_g2'], data['w']], + names = ['ra', 'dec', 'g1', 'g2', 'w']) + if data2 is None: + data2 = data + new_data2 = numpy.rec.fromarrays([data2['ra'], data2['dec'], + data2['psf_g1']*(data2['sigma']-data2['psf_sigma'])/data2['psf_sigma'], + data2['psf_g2']*(data2['sigma']-data2['psf_sigma'])/data2['psf_sigma'], + data2['w']], + names = ['ra', 'dec', 'g1', 'g2', 'w']) + if random is not None: + new_random = numpy.rec.fromarrays([random['ra'], random['dec'], + random['psf_g1'], random['psf_g2'], random['w']], + names = ['ra', 'dec', 'g1', 'g2', 'w']) + else: + new_random = random + if random2 is None: + random2 = random + if random2 is not None: + new_random2 = numpy.rec.fromarrays([random2['ra'], random2['dec'], + random2['psf_g1']*(random2['sigma']-random2['psf_sigma'])/random2['psf_sigma'], + random2['psf_g2']*(random2['sigma']-random2['psf_sigma'])/random2['psf_sigma'], + random2['w']], + names = ['ra', 'dec', 'g1', 'g2', 'w']) + else: + new_random2 = random2 + return self.getCF('gg', new_data, new_data2, new_random, new_random2, + config=config, **kwargs) + + class GalaxyDensityCorrelationSysTest(BaseCorrelationFunctionSysTest): """ Compute the galaxy position autocorrelations. @@ -919,7 +1136,7 @@ def __call__(self, array, percentiles=None, field=None, verbose=False, ignore_ba # Return. return result -def WhiskerPlotSysTest(type=None): +def WhiskerPlotSysTest(type=None): """ Initialize an instance of a BaseWhiskerPlotSysTest class, based on the 'type' kwarg given. Options are: @@ -942,7 +1159,7 @@ def WhiskerPlotSysTest(type=None): return BaseWhiskerPlotSysTest() else: raise ValueError('Unknown whisker plot type %s given to type kwarg'%type) - + class BaseWhiskerPlotSysTest(SysTest): short_name = 'whiskerplot' """ @@ -1107,7 +1324,437 @@ def __call__(self, array, linewidth=0.01, scale=None, figsize=None, size_label=r'$\sigma$ [pixel]', xlim=xlim, ylim=ylim, equal_axis=True) -def ScatterPlotSysTest(type=None): +class HistogramSysTest(SysTest): + """ + A base class for Stile systematics tests that generate histograms. + + Like the :class:`StatSysTest`, :class:`HistogramSysTest` has a number of options which can be + set either upon initialization or at runtime. When set at initialization, the options will hold + for any call to the object that doesn't explicitly override them; when set during a call, the + options will hold only for that call. + + See the documentation for the method :func:`HistoPlot` for a list of available kwargs. + """ + + short_name = 'histogram' + def __init__(self, binning_style='manual', nbins=50, + weights=None, limits=None, figsize=None, normed=False, + histtype='stepfilled', xlabel=None, ylabel=None, + xlim=None, ylim=None, hide_x=False, hide_y=False, + cumulative=False, align='mid', rwidth=0.9, + log=False, color='k', alpha=1.0, + text=None, text_x=0.90, text_y=0.90, fontsize=12, + linewidth=2.0, vlines=None, vcolor='k'): + self.binning_style = binning_style + self.nbins = nbins + self.weights = weights + self.limits = limits + self.figsize = figsize + self.normed = normed + self.histtype = histtype + self.xlabel = xlabel + self.ylabel = ylabel + self.xlim = xlim + self.ylim = ylim + self.hide_x = hide_x + self.hide_y = hide_y + self.cumulative = cumulative + self.align = align + self.rwidth = rwidth + self.log = log + self.color = color + self.alpha = alpha + self.text = text + self.text_x = text_x + self.text_y = text_y + self.fontsize = fontsize + self.linewidth = linewidth + self.vlines = vlines + self.vcolor = vcolor + + def get_param_value(self, param, ii, data_dim, multihist=False): + if type(param) is list and multihist: + if len(param) == data_dim: + param_use = param[ii] + else: + param_use = param[0] + elif type(param) is list: + param_use = param[0] + else: + param_use = param + return param_use + + """ + The Scott rule for bin size + This function is directly copied from the astroML library + (astroMl/density_estimation/histtool.py) + """ + def scotts_bin_width(self, data, return_bins=False): + r"""Return the optimal histogram bin width using Scott's rule: + + Parameters + ---------- + data : array-like, ndim=1 + observed (one-dimensional) data + return_bins : bool (optional) + if True, then return the bin edges + + Returns + ------- + width : float + optimal bin width using Scott's rule + bins : ndarray + bin edges: returned if `return_bins` is True + + Notes + ----- + The optimal bin width is + + .. math:: + \Delta_b = \frac{3.5\sigma}{n^{1/3}} + + where :math:`\sigma` is the standard deviation of the data, and + :math:`n` is the number of data points. + + See Also + -------- + knuth_bin_width + freedman_bin_width + astroML.plotting.hist + """ + data = numpy.asarray(data) + if data.ndim != 1: + raise ValueError("data should be one-dimensional") + + n = data.size + sigma = numpy.std(data) + + dx = 3.5 * sigma * 1. / (n ** (1. / 3)) + + if return_bins: + Nbins = numpy.ceil((data.max() - data.min()) * 1. / dx) + Nbins = max(1, Nbins) + bins = data.min() + dx * numpy.arange(Nbins + 1) + return dx, bins + else: + return dx + + """ + The Freedman-Diaconis rule of bin size + This function is directly copied from the astroML library + (astroMl/density_estimation/histtool.py) + """ + def freedman_bin_width(self, data, return_bins=False): + r"""Return the optimal histogram bin width using the Freedman-Diaconis + rule + + Parameters + ---------- + data : array-like, ndim=1 + observed (one-dimensional) data + return_bins : bool (optional) + if True, then return the bin edges + + Returns + ------- + width : float + optimal bin width using Scott's rule + bins : ndarray + bin edges: returned if `return_bins` is True + + Notes + ----- + The optimal bin width is + + .. math:: + \Delta_b = \frac{2(q_{75} - q_{25})}{n^{1/3}} + + where :math:`q_{N}` is the :math:`N` percent quartile of the data, and + :math:`n` is the number of data points. + + See Also + -------- + knuth_bin_width + scotts_bin_width + astroML.plotting.hist + """ + data = numpy.asarray(data) + if data.ndim != 1: + raise ValueError("data should be one-dimensional") + + n = data.size + if n < 4: + raise ValueError("data should have more than three entries") + + dsorted = numpy.sort(data) + v25 = dsorted[n / 4 - 1] + v75 = dsorted[(3 * n) / 4 - 1] + + dx = 2 * (v75 - v25) * 1. / (n ** (1. / 3)) + + if return_bins: + Nbins = numpy.ceil((dsorted[-1] - dsorted[0]) * 1. / dx) + Nbins = max(1, Nbins) + bins = dsorted[0] + dx * numpy.arange(Nbins + 1) + return dx, bins + else: + return dx + + """ + Generate the histogram + """ + # All of these defaults are None because they're set in the initalization and we want to be able + # to tell the difference between "I don't care, use the default" and "override initialization, + # use this value". Otherwise there could be a conflict for kwargs that have non-None defaults. + def HistoPlot(self, data_list, binning_style=None, nbins=None, + weights=None, limits=None, figsize=None, normed=None, + histtype=None, xlabel=None, ylabel=None, + xlim=None, ylim=None, hide_x=None, hide_y=None, + cumulative=None, align=None, rwidth=None, + log=None, color=None, alpha=None, + text=None, text_x=None, text_y=None, fontsize=None, + linewidth=None, vlines=None, vcolor=None): + + """ + Draw a histogram and return a `matplotlib.figure.Figure` object. + + This method has a bunch of options for controlling the appearance of + the histogram, which are explained below. + + @param data_list The 1-Dimension NumPy array or a list of Numpy arrays + for plotting histograms. + + @param binning_style Different selections of Histogram bin size: + = 'scott' : Using the Scott's rule to decide the bin size. + = 'freedman': Using the Freedman-Diaconis rule to decide the bin + size. + = 'manual' : Manually select a fixed number of bins. + [default: binning_style='manual'] + + @param nbins The number of bins if binning_style = 'manual' is selected. + [Default: nbins = 50] + @param weights An array of weights, or True to use the 'w' column from + the data array. [Default: None] + @param limits The [min, max] limits to trim the data before the + histogram is made. + [Default: limits = None] + @param normed Whether the normalized histogram is shown. + [Default: normed = False] + @param cumulative Whether the cumulative histogram is shown. + [Default: cumulative = False] + @param histtype The type of histogram to show: + histtype = 'bar' : Tradition bar-type histogram. + histtype = 'step' : Unfilled lineplot-type histogram. + histtype = 'stepfilled' : Filled lineplot-type histogram. + [Default: histtype = 'stepfilled'] + @param align Where the bars are centered relative to bin edges + = 'left', 'mid', or 'right'. + [Default: align = 'mid' ] + @param rwidth The relative width of the bars as a fraction of the + bin width. Ignored for histtype = 'step' or + 'stepfilled'. + [Default = None] + @param log If True, the histogram axis will be set to a log scale. + [Default = False] + @param color Color of the histogram. + [Default = None, which will use the standard color sequence] + @param figsize Size of a figure (x, y) in units of inches.. + [Default: None, meaning use the default value of matplotlib] + @param xlabel The x-axis label. + [Default: None, meaning do not show a label for the x-axis] + @param ylabel The y-axis label. + [Default: None, meaning do not show a label for the y-axis] + @param xlim Limits of x-axis (min, max). + [Default: None, meaning do not set any limits for x] + @param ylim Limits of y-axis (min, max). + [Default: None, meaning do not set any limits for y] + @param hide_x Whether hide the labels for x-axis. + [Default: hide_x = False] + @param hide_y Whether hide the labels for y-axis. + [Default: hide_y = False] + @param alpha 0.0 transparent through 1.0 opaque + [Default: alpha = 1.0] + @param linewidth With of the vertical lines + [Default: linewidth = 2.0] + @param text Text to put on the figure + [Default: None] + @param text_x The X-coordinate of the text on the plot + [Default: text_x = 0.9] + @param text_y The Y-coordinate of the test on the plot + [Default: text_y = 0.9] + @param fontsize Font size of the text + [Default: fontsize = 12] + @param vlines Locations to plot vertical lines to indicate interesting + values. + [Default: None] + @param vcolor Color or a list of color for vertical lines to plot. + [Default: 'k'] + + @returns a matplotlib.figure.Figure object. + """ + + # Get defaults from the class attributes if necessary + for key_name in ['binning_style', 'nbins', 'weights', 'limits', 'figsize', 'normed', + 'histtype', 'xlabel', 'ylabel', 'xlim', 'ylim', 'hide_x', 'hide_y', + 'cumulative', 'align', 'rwidth', 'log', 'color', 'alpha', 'text', + 'text_x', 'text_y', 'fontsize', 'linewidth', 'vlines', 'vcolor']: + exec('if %s is None: %s = self.%s'%(key_name, key_name, key_name)) + + ## Define the plot + hist = plt.figure(figsize=figsize) + ax = hist.add_subplot(1, 1, 1) + + data_dim = len(data_list) + for ii in range(data_dim): + + if type(data_list[0]) is list or type(data_list[0]) is numpy.ndarray: + multihist = True + data = data_list[ii] + else: + multihist = False + data = data_list + + # mask data with NaN + data = data[numpy.isnan(data) == False] + data = numpy.asarray(data) + + # trim the data if necessary + if limits is not None: + data = data[(data >= limits[0]) & (data <= limits[1])] + + # decide which bin style to use + style_use = self.get_param_value(binning_style, ii, data_dim, + multihist=multihist) + + # now support constant bin size, Scott rule, and Freedman rule + if style_use in ['scott', 'freedman', 'manual']: + if (style_use is 'scott'): + "Use the Scott rule" + dx, bins = self.scotts_bin_width(data, True) + elif style_use is 'freedman': + "Use the Freedman rule" + dx, bins = self.freedman_bin_width(data, True) + elif style_use is 'manual': + bins = nbins + else: + print "Unrecognized code for binning style, use default instead!" + bins = nbins + + if weights is True: + weights = data['w'] + + # decide if weight is presented + if weights is not None and multihist: + if len(weights) == data_dim: + weight_use = weights[ii] + else: + import warnings + warnings.warn("Inconsistent shape between data and weights! No weight is used!") + weight_use = None + elif weights is not None: + if len(weights) == len(data): + weight_use = weights + else: + import warnings + warnings.warn("Inconsistent shape between data and weights! No weight is used!") + weight_use = None + else: + import warnings + warnings.warn("The format of given weights cannot be understood! No weight is used!") + weight_use = None + + # decide which histtype to use + hist_use = self.get_param_value(histtype, ii, data_dim, + multihist=multihist) + # the color of the histogram + color_use = self.get_param_value(color, ii, data_dim, + multihist=multihist) + # the transparency of the histogram + alpha_use = self.get_param_value(alpha, ii, data_dim, + multihist=multihist) + # the relative width of the bar + rwidth_use = self.get_param_value(rwidth, ii, data_dim, + multihist=multihist) + # the width of the vertical line + lwidth_use = self.get_param_value(linewidth, ii, data_dim, + multihist=multihist) + + # make the histogram + counts, edges, patches = ax.hist(data, bins, + weights = weight_use, + histtype = hist_use, + color = color_use, + normed = normed, + cumulative = cumulative, + alpha = alpha_use, + rwidth = rwidth_use, + log = log, + align = align, + linewidth = lwidth_use + ) + + # outline the filled region + if hist_use is 'stepfilled': + counts, edges, patches = ax.hist(data, bins, + weights = weight_use, + histtype = 'step', + color = 'k', + normed = normed, + cumulative = cumulative, + alpha = 1.0, + rwidth = rwidth_use, + log = log, + align = align, + linewidth = 1.0 + ) + + ymin, ymax = ax.get_ylim() + + if not multihist: + break + + # add the text when necessary + if text is not None: + ax.text(text_x, text_y, text, transform=ax.transAxes, + ha='right', va='top', fontsize=fontsize) + + # add vertical lines when necessary + if vlines is not None and not hasattr(vlines, '__iter__'): + for jj in range(len(vlines)): + vline_use = vlines[jj] + + if type(vcolor) == list: + if len(vcolor) == len(vlines): + vcolor_use = vcolor[jj] + else: + vcolor_use = vcolor[0] + else: + vcolor_use = vcolor + + ax.vlines(vline_use, ymin, ymax, colors=vcolor_use, + linestyle='dashed', + linewidth=1.8) + + if xlabel is not None: + ax.set_xlabel(xlabel) + if ylabel is not None: + ax.set_ylabel(ylabel) + + if xlim is not None: + ax.set_xlim(*xlim) + if ylim is not None: + ax.set_ylim(*ylim) + + if hide_x: + ax.xaxis.set_major_formatter(plt.NullFormatter()) + if hide_y: + ax.yaxis.set_major_formatter(plt.NullFormatter()) + + return hist + def __call__(self, *args, **kwargs): + return self.HistoPlot(*args, **kwargs) + +def ScatterPlotSysTest(type=None): """ Initialize an instance of a BaseScatterPlotSysTest class, based on the 'type' kwarg given. Options are: