diff --git a/README.rst b/README.rst index b4c38eaa..cd35ea35 100644 --- a/README.rst +++ b/README.rst @@ -47,7 +47,7 @@ plothist :target: https://badge.fury.io/py/plothist .. |Code style: black| image:: https://img.shields.io/badge/code%20style-black-000000.svg :target: https://github.com/psf/black -.. |Docs from latest| image:: https://img.shields.io/badge/docs-v0.9-blue.svg +.. |Docs from latest| image:: https://img.shields.io/badge/docs-v1.0-blue.svg :target: https://plothist.readthedocs.io/en/latest/ .. |Docs from main| image:: https://img.shields.io/badge/docs-main-blue.svg :target: https://plothist.readthedocs.io/en/main/ diff --git a/docs/conf.py b/docs/conf.py index 96a19554..6898a5ab 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -36,9 +36,9 @@ author = "Cyrille Praz, Tristan Fillinger" # The short X.Y version -version = "0.9" +version = "1.0" # The full version, including alpha/beta/rc tags -release = "0.9" +release = "1.0" # -- General configuration --------------------------------------------------- diff --git a/docs/examples/model_ex/model_all_comparisons.py b/docs/examples/model_ex/model_all_comparisons.py index d3b96ef0..897c2f4a 100644 --- a/docs/examples/model_ex/model_all_comparisons.py +++ b/docs/examples/model_ex/model_all_comparisons.py @@ -91,7 +91,7 @@ h1_label="Data", h2_label="Pred.", ratio_uncertainty_type="split", - hist_1_uncertainty_type="asymmetrical", + h1_uncertainty_type="asymmetrical", ) add_text( f' $\mathbf{{→}}$ comparison = "{comparison}"', ax=ax_comparison, fontsize=13 diff --git a/docs/examples/model_ex/model_all_comparisons_no_model_unc.py b/docs/examples/model_ex/model_all_comparisons_no_model_unc.py index d46f7cab..9097023c 100644 --- a/docs/examples/model_ex/model_all_comparisons_no_model_unc.py +++ b/docs/examples/model_ex/model_all_comparisons_no_model_unc.py @@ -99,7 +99,7 @@ h1_label="Data", h2_label="Pred.", ratio_uncertainty_type="split", - hist_1_uncertainty_type="asymmetrical", + h1_uncertainty_type="asymmetrical", ) if comparison == "pull": # Since the uncertainties of the model are neglected, the pull label is "(Data - Pred.)/sigma_Data" diff --git a/docs/examples/model_ex/model_comparisons_ratio_options.py b/docs/examples/model_ex/model_comparisons_ratio_options.py index b48dfdf2..3d1a211c 100644 --- a/docs/examples/model_ex/model_comparisons_ratio_options.py +++ b/docs/examples/model_ex/model_comparisons_ratio_options.py @@ -110,7 +110,7 @@ h1_label="Data", h2_label="Pred.", ratio_uncertainty_type=ratio_uncertainty_type, - hist_1_uncertainty_type="asymmetrical", + h1_uncertainty_type="asymmetrical", ) add_text( f' $\mathbf{{→}}$ comparison = "ratio", \n $\mathbf{{→}}$ ratio_uncertainty_type="{ratio_uncertainty_type}", model_uncertainty = {model_uncertainty}', diff --git a/docs/examples/utility/matplotlib_vs_plothist_style.py b/docs/examples/utility/matplotlib_vs_plothist_style.py index a0dcdbaa..5d01b1d3 100644 --- a/docs/examples/utility/matplotlib_vs_plothist_style.py +++ b/docs/examples/utility/matplotlib_vs_plothist_style.py @@ -33,14 +33,14 @@ hist_0, bins, _ = ax1.hist( df["variable_0"], bins=20, histtype="step", linewidth=1.2, label="h1" ) - hist_1 = ax1.hist( + h1 = ax1.hist( df["variable_1"], bins=bins, histtype="step", linewidth=1.2, label="h2" ) ax1.set_ylabel("Entries") ax1.legend() # Calculate the ratio of histogram values and plot in the second subplot (ax2) - ratio = hist_0 / hist_1[0] # Divide bin values of variable_0 by variable_1 + ratio = hist_0 / h1[0] # Divide bin values of variable_0 by variable_1 bin_centers = 0.5 * (bins[:-1] + bins[1:]) # Calculate bin centers # Create fake error bars for the ratio diff --git a/plothist/__init__.py b/plothist/__init__.py index 7236d36f..b108a3d0 100644 --- a/plothist/__init__.py +++ b/plothist/__init__.py @@ -1,5 +1,5 @@ """Plot histograms in a scalable way and a beautiful style.""" -__version__ = "0.9" +__version__ = "1.0" from .plotters import ( create_comparison_figure, diff --git a/plothist/comparison.py b/plothist/comparison.py index 62b611d7..af841d9c 100644 --- a/plothist/comparison.py +++ b/plothist/comparison.py @@ -100,15 +100,15 @@ def _check_binning_consistency(hist_list): raise ValueError("The bins of the histograms must be equal.") -def get_ratio_variances(hist_1, hist_2): +def get_ratio_variances(h1, h2): """ - Calculate the variances of the ratio of two histograms (hist_1/hist_2). + Calculate the variances of the ratio of two histograms (h1/h2). Parameters ---------- - hist_1 : boost_histogram.Histogram + h1 : boost_histogram.Histogram The first histogram. - hist_2 : boost_histogram.Histogram + h2 : boost_histogram.Histogram The second histogram. Returns @@ -121,15 +121,13 @@ def get_ratio_variances(hist_1, hist_2): ValueError If the bins of the histograms are not equal. """ - - _check_binning_consistency([hist_1, hist_2]) - _check_binning_consistency([hist_1, hist_2]) + _check_binning_consistency([h1, h2]) np.seterr(divide="ignore", invalid="ignore") ratio_variances = np.where( - hist_2.values() != 0, - hist_1.variances() / hist_2.values() ** 2 - + hist_2.variances() * hist_1.values() ** 2 / hist_2.values() ** 4, + h2.values() != 0, + h1.variances() / h2.values() ** 2 + + h2.variances() * h1.values() ** 2 / h2.values() ** 4, np.nan, ) np.seterr(divide="warn", invalid="warn") @@ -137,18 +135,19 @@ def get_ratio_variances(hist_1, hist_2): return ratio_variances -def get_pull(hist_1, hist_2, hist_1_uncertainty_type="symmetrical"): +def get_pull(h1, h2, h1_uncertainty_type="symmetrical"): """ Compute the pull between two histograms. Parameters ---------- - hist_1 : boost_histogram.Histogram + h1 : boost_histogram.Histogram The first histogram. - hist_2 : boost_histogram.Histogram + h2 : boost_histogram.Histogram The second histogram. - hist_1_uncertainty_type : str, optional - What kind of bin uncertainty to use for hist_1: "symmetrical" for the Poisson standard deviation derived from the variance stored in the histogram object, "asymmetrical" for asymmetrical uncertainties based on a Poisson confidence interval. Default is "symmetrical". + h1_uncertainty_type : str, optional + What kind of bin uncertainty to use for h1: "symmetrical" for the Poisson standard deviation derived from the variance stored in the histogram object, "asymmetrical" for asymmetrical uncertainties based on a Poisson confidence interval. Default is "symmetrical". + Returns ------- comparison_values : numpy.ndarray @@ -158,23 +157,23 @@ def get_pull(hist_1, hist_2, hist_1_uncertainty_type="symmetrical"): comparison_uncertainties_high : numpy.ndarray The upper uncertainties on the pull. Always ones. """ - _check_uncertainty_type(hist_1_uncertainty_type) - _check_binning_consistency([hist_1, hist_2]) + _check_uncertainty_type(h1_uncertainty_type) + _check_binning_consistency([h1, h2]) - if hist_1_uncertainty_type == "asymmetrical": - uncertainties_low, uncertainties_high = get_asymmetrical_uncertainties(hist_1) - hist_1_variances = np.where( - hist_1.values() >= hist_2.values(), + if h1_uncertainty_type == "asymmetrical": + uncertainties_low, uncertainties_high = get_asymmetrical_uncertainties(h1) + h1_variances = np.where( + h1.values() >= h2.values(), uncertainties_low ** 2, uncertainties_high ** 2, ) - hist_1 = hist_1.copy() - hist_1[:] = np.c_[hist_1.values(), hist_1_variances] + h1 = h1.copy() + h1[:] = np.c_[h1.values(), h1_variances] comparison_values = np.where( - hist_1.variances() + hist_2.variances() != 0, - (hist_1.values() - hist_2.values()) - / np.sqrt(hist_1.variances() + hist_2.variances()), + h1.variances() + h2.variances() != 0, + (h1.values() - h2.values()) + / np.sqrt(h1.variances() + h2.variances()), np.nan, ) comparison_uncertainties_low = np.ones_like(comparison_values) @@ -187,18 +186,19 @@ def get_pull(hist_1, hist_2, hist_1_uncertainty_type="symmetrical"): ) -def get_difference(hist_1, hist_2, hist_1_uncertainty_type="symmetrical"): +def get_difference(h1, h2, h1_uncertainty_type="symmetrical"): """ Compute the difference between two histograms. Parameters ---------- - hist_1 : boost_histogram.Histogram + h1 : boost_histogram.Histogram The first histogram. - hist_2 : boost_histogram.Histogram + h2 : boost_histogram.Histogram The second histogram. - hist_1_uncertainty_type : str, optional - What kind of bin uncertainty to use for hist_1: "symmetrical" for the Poisson standard deviation derived from the variance stored in the histogram object, "asymmetrical" for asymmetrical uncertainties based on a Poisson confidence interval. Default is "symmetrical". + h1_uncertainty_type : str, optional + What kind of bin uncertainty to use for h1: "symmetrical" for the Poisson standard deviation derived from the variance stored in the histogram object, "asymmetrical" for asymmetrical uncertainties based on a Poisson confidence interval. Default is "symmetrical". + Returns ------- comparison_values : numpy.ndarray @@ -208,22 +208,22 @@ def get_difference(hist_1, hist_2, hist_1_uncertainty_type="symmetrical"): comparison_uncertainties_high : numpy.ndarray The upper uncertainties on the difference. """ - _check_uncertainty_type(hist_1_uncertainty_type) - _check_binning_consistency([hist_1, hist_2]) + _check_uncertainty_type(h1_uncertainty_type) + _check_binning_consistency([h1, h2]) - comparison_values = hist_1.values() - hist_2.values() + comparison_values = h1.values() - h2.values() - if hist_1_uncertainty_type == "asymmetrical": - uncertainties_low, uncertainties_high = get_asymmetrical_uncertainties(hist_1) + if h1_uncertainty_type == "asymmetrical": + uncertainties_low, uncertainties_high = get_asymmetrical_uncertainties(h1) comparison_uncertainties_low = np.sqrt( - uncertainties_low ** 2 + hist_2.variances() + uncertainties_low ** 2 + h2.variances() ) comparison_uncertainties_high = np.sqrt( - uncertainties_high ** 2 + hist_2.variances() + uncertainties_high ** 2 + h2.variances() ) else: - comparison_uncertainties_low = np.sqrt(hist_1.variances() + hist_2.variances()) + comparison_uncertainties_low = np.sqrt(h1.variances() + h2.variances()) comparison_uncertainties_high = comparison_uncertainties_low return ( @@ -233,17 +233,18 @@ def get_difference(hist_1, hist_2, hist_1_uncertainty_type="symmetrical"): ) -def get_asymmetry(hist_1, hist_2): +def get_asymmetry(h1, h2): """ - Get the asymmetry between two histograms hist_1 and hist_2, defined as (hist_1 - hist_2) / (hist_1 + hist_2). + Get the asymmetry between two histograms h1 and h2, defined as (h1 - h2) / (h1 + h2). Only symmetrical uncertainties are supported. Parameters ---------- - hist_1 : boost_histogram.Histogram + h1 : boost_histogram.Histogram The first histogram. - hist_2 : boost_histogram.Histogram + h2 : boost_histogram.Histogram The second histogram. + Returns ------- comparison_values : numpy.ndarray @@ -253,10 +254,10 @@ def get_asymmetry(hist_1, hist_2): comparison_uncertainties_high : numpy.ndarray The upper uncertainties on the asymmetry. """ - _check_binning_consistency([hist_1, hist_2]) + _check_binning_consistency([h1, h2]) - hist_sum = hist_1 + hist_2 - hist_diff = hist_1 + (-1 * hist_2) + hist_sum = h1 + h2 + hist_diff = h1 + (-1 * h2) comparison_values = np.where( hist_sum.values() != 0, hist_diff.values() / hist_sum.values(), np.nan ) @@ -269,24 +270,25 @@ def get_asymmetry(hist_1, hist_2): def get_ratio( - hist_1, - hist_2, + h1, + h2, + h1_uncertainty_type="symmetrical", ratio_uncertainty_type="uncorrelated", - hist_1_uncertainty_type="symmetrical", ): """ Compute the ratio between two histograms. Parameters ---------- - hist_1 : boost_histogram.Histogram + h1 : boost_histogram.Histogram The numerator histogram. - hist_2 : boost_histogram.Histogram + h2 : boost_histogram.Histogram The denominator histogram. + h1_uncertainty_type : str, optional + What kind of bin uncertainty to use for h1: "symmetrical" for the Poisson standard deviation derived from the variance stored in the histogram object, "asymmetrical" for asymmetrical uncertainties based on a Poisson confidence interval. Default is "symmetrical". ratio_uncertainty_type : str, optional - How to treat the uncertainties of the histograms: "uncorrelated" for simple comparison, "split" for scaling and split hist_1 and hist_2 uncertainties. Default is "uncorrelated". - hist_1_uncertainty_type : str, optional - What kind of bin uncertainty to use for hist_1: "symmetrical" for the Poisson standard deviation derived from the variance stored in the histogram object, "asymmetrical" for asymmetrical uncertainties based on a Poisson confidence interval. Default is "symmetrical". + How to treat the uncertainties of the histograms: "uncorrelated" for simple comparison, "split" for scaling and split h1 and h2 uncertainties. Default is "uncorrelated". + Returns ------- comparison_values : numpy.ndarray @@ -296,39 +298,39 @@ def get_ratio( comparison_uncertainties_high : numpy.ndarray The upper uncertainties on the ratio. """ - _check_uncertainty_type(hist_1_uncertainty_type) - _check_binning_consistency([hist_1, hist_2]) + _check_uncertainty_type(h1_uncertainty_type) + _check_binning_consistency([h1, h2]) comparison_values = np.where( - hist_2.values() != 0, hist_1.values() / hist_2.values(), np.nan + h2.values() != 0, h1.values() / h2.values(), np.nan ) - if hist_1_uncertainty_type == "asymmetrical": - uncertainties_low, uncertainties_high = get_asymmetrical_uncertainties(hist_1) + if h1_uncertainty_type == "asymmetrical": + uncertainties_low, uncertainties_high = get_asymmetrical_uncertainties(h1) if ratio_uncertainty_type == "uncorrelated": - if hist_1_uncertainty_type == "asymmetrical": - hist_1_high = hist_1.copy() - hist_1_high[:] = np.c_[hist_1_high.values(), uncertainties_high ** 2] - hist_1_low = hist_1.copy() - hist_1_low[:] = np.c_[hist_1_low.values(), uncertainties_low ** 2] + if h1_uncertainty_type == "asymmetrical": + h1_high = h1.copy() + h1_high[:] = np.c_[h1_high.values(), uncertainties_high ** 2] + h1_low = h1.copy() + h1_low[:] = np.c_[h1_low.values(), uncertainties_low ** 2] comparison_uncertainties_low = np.sqrt( - get_ratio_variances(hist_1_low, hist_2) + get_ratio_variances(h1_low, h2) ) comparison_uncertainties_high = np.sqrt( - get_ratio_variances(hist_1_high, hist_2) + get_ratio_variances(h1_high, h2) ) else: - comparison_uncertainties_low = np.sqrt(get_ratio_variances(hist_1, hist_2)) + comparison_uncertainties_low = np.sqrt(get_ratio_variances(h1, h2)) comparison_uncertainties_high = comparison_uncertainties_low elif ratio_uncertainty_type == "split": - if hist_1_uncertainty_type == "asymmetrical": - comparison_uncertainties_low = uncertainties_low / hist_2.values() - comparison_uncertainties_high = uncertainties_high / hist_2.values() + if h1_uncertainty_type == "asymmetrical": + comparison_uncertainties_low = uncertainties_low / h2.values() + comparison_uncertainties_high = uncertainties_high / h2.values() else: h1_scaled_uncertainties = np.where( - hist_2.values() != 0, - np.sqrt(hist_1.variances()) / hist_2.values(), + h2.values() != 0, + np.sqrt(h1.variances()) / h2.values(), np.nan, ) comparison_uncertainties_low = h1_scaled_uncertainties @@ -344,27 +346,28 @@ def get_ratio( def get_comparison( - hist_1, - hist_2, + h1, + h2, comparison, + h1_uncertainty_type="symmetrical", ratio_uncertainty_type="uncorrelated", - hist_1_uncertainty_type="symmetrical", ): """ Compute the comparison between two histograms. Parameters ---------- - hist_1 : boost_histogram.Histogram + h1 : boost_histogram.Histogram The first histogram for comparison. - hist_2 : boost_histogram.Histogram + h2 : boost_histogram.Histogram The second histogram for comparison. comparison : str The type of comparison ("ratio", "pull", "difference", "relative_difference" or "asymmetry"). + h1_uncertainty_type : str, optional + What kind of bin uncertainty to use for h1: "symmetrical" for the Poisson standard deviation derived from the variance stored in the histogram object, "asymmetrical" for asymmetrical uncertainties based on a Poisson confidence interval. Default is "symmetrical". ratio_uncertainty_type : str, optional - How to treat the uncertainties of the histograms when comparison is "ratio" or "relative_difference" ("uncorrelated" for simple comparison, "split" for scaling and split hist_1 and hist_2 uncertainties). This argument has no effect if comparison != "ratio" or "relative_difference". Default is "uncorrelated". - hist_1_uncertainty_type : str, optional - What kind of bin uncertainty to use for hist_1: "symmetrical" for the Poisson standard deviation derived from the variance stored in the histogram object, "asymmetrical" for asymmetrical uncertainties based on a Poisson confidence interval. Default is "symmetrical". + How to treat the uncertainties of the histograms when comparison is "ratio" or "relative_difference" ("uncorrelated" for simple comparison, "split" for scaling and split h1 and h2 uncertainties). This argument has no effect if comparison != "ratio" or "relative_difference". Default is "uncorrelated". + Returns ------- values : numpy.ndarray @@ -374,34 +377,34 @@ def get_comparison( upper_uncertainties : numpy.ndarray The upper uncertainties on the comparison values. """ - _check_uncertainty_type(hist_1_uncertainty_type) - _check_binning_consistency([hist_1, hist_2]) + _check_uncertainty_type(h1_uncertainty_type) + _check_binning_consistency([h1, h2]) np.seterr(divide="ignore", invalid="ignore") if comparison == "ratio": values, lower_uncertainties, upper_uncertainties = get_ratio( - hist_1, hist_2, ratio_uncertainty_type, hist_1_uncertainty_type + h1, h2, h1_uncertainty_type, ratio_uncertainty_type ) elif comparison == "relative_difference": values, lower_uncertainties, upper_uncertainties = get_ratio( - hist_1, hist_2, ratio_uncertainty_type, hist_1_uncertainty_type + h1, h2, h1_uncertainty_type, ratio_uncertainty_type ) values -= 1 # relative difference is ratio-1 elif comparison == "pull": values, lower_uncertainties, upper_uncertainties = get_pull( - hist_1, hist_2, hist_1_uncertainty_type + h1, h2, h1_uncertainty_type ) elif comparison == "difference": values, lower_uncertainties, upper_uncertainties = get_difference( - hist_1, hist_2, hist_1_uncertainty_type + h1, h2, h1_uncertainty_type ) elif comparison == "asymmetry": - if hist_1_uncertainty_type == "asymmetrical": + if h1_uncertainty_type == "asymmetrical": raise ValueError( "Asymmetrical uncertainties are not supported for the asymmetry comparison." ) - values, lower_uncertainties, upper_uncertainties = get_asymmetry(hist_1, hist_2) + values, lower_uncertainties, upper_uncertainties = get_asymmetry(h1, h2) else: raise ValueError( f"{comparison} not available as a comparison ('ratio', 'pull', 'difference', 'relative_difference' or 'asymmetry')." diff --git a/plothist/get_dummy_data.py b/plothist/get_dummy_data.py index 35cd2db5..12bdb0c3 100644 --- a/plothist/get_dummy_data.py +++ b/plothist/get_dummy_data.py @@ -3,5 +3,13 @@ def get_dummy_data(): + """ + Get dummy data for plotting examples. + + Returns + ------- + pandas.DataFrame + Dummy data. + """ with resources_path("plothist", "dummy_data.csv") as dummy_data: return pd.read_csv(dummy_data) diff --git a/plothist/plothist_style.py b/plothist/plothist_style.py index 0f06980e..4ada86f3 100644 --- a/plothist/plothist_style.py +++ b/plothist/plothist_style.py @@ -10,12 +10,12 @@ def set_style(style="default"): """ - Set the matplotlib style. + Set the plothist style. Parameters ---------- style : str, optional - Switch between different styles. Default is 'default'. More style might come in the futur. + Switch between different styles. Default is 'default'. More style might come in the future. Returns ------- diff --git a/plothist/plotters.py b/plothist/plotters.py index 73f77507..e136d90b 100644 --- a/plothist/plotters.py +++ b/plothist/plotters.py @@ -13,7 +13,6 @@ get_asymmetrical_uncertainties, _check_binning_consistency, _check_uncertainty_type, - _is_unweighted, ) from plothist.histogramming import _make_hist_from_function from plothist.plothist_style import set_fitting_ylabel_fontsize @@ -206,6 +205,9 @@ def plot_function(func, range, ax, stacked=False, npoints=1000, **kwargs): **kwargs, ) else: + if kwargs.get("labels", None) is None: + kwargs["labels"] = [] + if not isinstance(func, list): func = [func] n_collections_before = len(list(ax.collections)) @@ -415,8 +417,8 @@ def plot_hist_uncertainties(hist, ax, **kwargs): def plot_two_hist_comparison( - hist_1, - hist_2, + h1, + h2, xlabel=None, ylabel=None, h1_label="h1", @@ -431,9 +433,9 @@ def plot_two_hist_comparison( Parameters ---------- - hist_1 : boost_histogram.Histogram + h1 : boost_histogram.Histogram The first histogram to compare. - hist_2 : boost_histogram.Histogram + h2 : boost_histogram.Histogram The second histogram to compare. xlabel : str, optional The label for the x-axis. Default is None. @@ -467,7 +469,7 @@ def plot_two_hist_comparison( """ - _check_binning_consistency([hist_1, hist_2]) + _check_binning_consistency([h1, h2]) if fig is None and ax_main is None and ax_comparison is None: fig, (ax_main, ax_comparison) = create_comparison_figure() @@ -476,18 +478,18 @@ def plot_two_hist_comparison( "Need to provid fig, ax_main and ax_comparison (or none of them)." ) - xlim = (hist_1.axes[0].edges[0], hist_1.axes[0].edges[-1]) + xlim = (h1.axes[0].edges[0], h1.axes[0].edges[-1]) - plot_hist(hist_1, ax=ax_main, label=h1_label, histtype="step") - plot_hist(hist_2, ax=ax_main, label=h2_label, histtype="step") + plot_hist(h1, ax=ax_main, label=h1_label, histtype="step") + plot_hist(h2, ax=ax_main, label=h2_label, histtype="step") ax_main.set_xlim(xlim) ax_main.set_ylabel(ylabel) ax_main.legend() _ = ax_main.xaxis.set_ticklabels([]) plot_comparison( - hist_1, - hist_2, + h1, + h2, ax_comparison, xlabel=xlabel, h1_label=h1_label, @@ -501,8 +503,8 @@ def plot_two_hist_comparison( def plot_comparison( - hist_1, - hist_2, + h1, + h2, ax, xlabel="", h1_label="h1", @@ -510,8 +512,8 @@ def plot_comparison( comparison="ratio", comparison_ylabel=None, comparison_ylim=None, + h1_uncertainty_type="symmetrical", ratio_uncertainty_type="uncorrelated", - hist_1_uncertainty_type="symmetrical", **plot_hist_kwargs, ): """ @@ -519,9 +521,9 @@ def plot_comparison( Parameters ---------- - hist_1 : boost_histogram.Histogram + h1 : boost_histogram.Histogram The first histogram for comparison. - hist_2 : boost_histogram.Histogram + h2 : boost_histogram.Histogram The second histogram for comparison. ax : matplotlib.axes.Axes The axes to plot the comparison. @@ -537,10 +539,10 @@ def plot_comparison( The label for the y-axis. Default is the explicit formula used to compute the comparison plot. comparison_ylim : tuple or None, optional The y-axis limits for the comparison plot. Default is None. If None, standard y-axis limits are setup. + h1_uncertainty_type : str, optional + What kind of bin uncertainty to use for h1: "symmetrical" for the Poisson standard deviation derived from the variance stored in the histogram object, "asymmetrical" for asymmetrical uncertainties based on a Poisson confidence interval. Default is "symmetrical". ratio_uncertainty_type : str, optional - How to treat the uncertainties of the histograms when comparison is "ratio" or "relative_difference" ("uncorrelated" for simple comparison, "split" for scaling and split hist_1 and hist_2 uncertainties). This argument has no effect if comparison != "ratio" or "relative_difference". Default is "uncorrelated". - hist_1_uncertainty_type : str, optional - What kind of bin uncertainty to use for hist_1: "symmetrical" for the Poisson standard deviation derived from the variance stored in the histogram object, "asymmetrical" for asymmetrical uncertainties based on a Poisson confidence interval. Default is "symmetrical". + How to treat the uncertainties of the histograms when comparison is "ratio" or "relative_difference" ("uncorrelated" for simple comparison, "split" for scaling and split h1 and h2 uncertainties). This argument has no effect if comparison != "ratio" or "relative_difference". Default is "uncorrelated". **plot_hist_kwargs : optional Arguments to be passed to plot_hist() or plot_error_hist(), called in case the comparison is "pull" or "ratio", respectively. In case of pull, the default arguments are histtype="stepfilled" and color="darkgrey". In case of ratio, the default argument is color="black". @@ -558,18 +560,18 @@ def plot_comparison( h1_label = _get_math_text(h1_label) h2_label = _get_math_text(h2_label) - _check_binning_consistency([hist_1, hist_2]) + _check_binning_consistency([h1, h2]) comparison_values, lower_uncertainties, upper_uncertainties = get_comparison( - hist_1, hist_2, comparison, ratio_uncertainty_type, hist_1_uncertainty_type + h1, h2, comparison, h1_uncertainty_type, ratio_uncertainty_type ) if np.allclose(lower_uncertainties, upper_uncertainties, equal_nan=True): - hist_comparison = bh.Histogram(hist_2.axes[0], storage=bh.storage.Weight()) + hist_comparison = bh.Histogram(h2.axes[0], storage=bh.storage.Weight()) hist_comparison[:] = np.c_[comparison_values, lower_uncertainties**2] else: plot_hist_kwargs.setdefault("yerr", [lower_uncertainties, upper_uncertainties]) - hist_comparison = bh.Histogram(hist_2.axes[0], storage=bh.storage.Weight()) + hist_comparison = bh.Histogram(h2.axes[0], storage=bh.storage.Weight()) hist_comparison[:] = np.c_[comparison_values, np.zeros_like(comparison_values)] if comparison == "pull": @@ -601,13 +603,13 @@ def plot_comparison( if ratio_uncertainty_type == "split": np.seterr(divide="ignore", invalid="ignore") h2_scaled_uncertainties = np.where( - hist_2.values() != 0, - np.sqrt(hist_2.variances()) / hist_2.values(), + h2.values() != 0, + np.sqrt(h2.variances()) / h2.values(), np.nan, ) np.seterr(divide="warn", invalid="warn") ax.bar( - x=hist_2.axes[0].centers, + x=h2.axes[0].centers, bottom=np.nan_to_num( bottom_shift - h2_scaled_uncertainties, nan=comparison_ylim[0] ), @@ -615,7 +617,7 @@ def plot_comparison( 2 * h2_scaled_uncertainties, nan=comparison_ylim[-1] - comparison_ylim[0], ), - width=hist_2.axes[0].widths, + width=h2.axes[0].widths, edgecolor="dimgrey", hatch="////", fill=False, @@ -640,7 +642,7 @@ def plot_comparison( ax.axhline(0, ls="--", lw=1.0, color="black") ax.set_ylabel(rf"$\frac{{{h1_label} - {h2_label}}}{{{h1_label} + {h2_label}}}$") - xlim = (hist_1.axes[0].edges[0], hist_1.axes[0].edges[-1]) + xlim = (h1.axes[0].edges[0], h1.axes[0].edges[-1]) ax.set_xlim(xlim) ax.set_xlabel(xlabel) if comparison_ylim is not None: @@ -1117,7 +1119,7 @@ def plot_data_model_comparison( model_hist, ax=ax_comparison, xlabel=xlabel, - hist_1_uncertainty_type=data_uncertainty_type, + h1_uncertainty_type=data_uncertainty_type, **comparison_kwargs, ) diff --git a/plothist/variable_registry.py b/plothist/variable_registry.py index 547f331c..6e1088a9 100644 --- a/plothist/variable_registry.py +++ b/plothist/variable_registry.py @@ -134,14 +134,14 @@ def create_variable_registry( _save_variable_registry(variable_registry, path=path) -def get_variable_from_registry(variable, path="./variable_registry.yaml"): +def get_variable_from_registry(variable_key, path="./variable_registry.yaml"): """ This function retrieves the parameter information for a variable from the variable registry file specified by the 'path' parameter. It loads the variable registry file and returns the dictionary entry corresponding to the specified variable name. Parameters ---------- - variable : str + variable_key : str The name of the variable for which to retrieve parameter information. path : str, optional The path to the variable registry file (default is "./variable_registry.yaml"). @@ -160,7 +160,7 @@ def get_variable_from_registry(variable, path="./variable_registry.yaml"): with open(path, "r") as f: variable_registry = yaml.safe_load(f) - return variable_registry[variable] + return variable_registry[variable_key] def update_variable_registry( @@ -250,8 +250,8 @@ def update_variable_registry_ranges( Parameters ---------- - data : dict - A dictionary containing the data for the variables. + data : dict or pandas.DataFrame + A dataset containing the data for the variables. variable_keys : list A list of variable keys for which to update the range parameters in the registry. The variable needs to have a bin and range properties in the registry. Default is None: all variables in the registry are updated. path : str, optional