Skip to content

Commit

Permalink
fix: saturated likelihood evaluation with custom auxiliary data (#378)
Browse files Browse the repository at this point in the history
* fix: correctly evaluate the constraint term for the saturated likelihood with best-fit parameters
* updated pre-commit
  • Loading branch information
alexander-held authored Nov 24, 2022
1 parent bb30815 commit 87a452b
Show file tree
Hide file tree
Showing 6 changed files with 186 additions and 9 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ repos:
hooks:
- id: black
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.982
rev: v0.991
hooks:
- id: mypy
name: mypy with Python 3.8
Expand All @@ -22,12 +22,12 @@ repos:
- id: flake8
additional_dependencies: [flake8-bugbear, flake8-import-order, flake8-print]
- repo: https://github.com/asottile/pyupgrade
rev: v3.1.0
rev: v3.2.2
hooks:
- id: pyupgrade
args: ["--py37-plus"]
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
rev: v4.4.0
hooks:
- id: check-added-large-files
args: ["--maxkb=100"]
Expand Down
14 changes: 10 additions & 4 deletions src/cabinetry/fit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,8 @@ def _goodness_of_fit(
"""Calculates goodness-of-fit p-value with a saturated model.
Returns NaN if the number of degrees of freedom in the chi2 test is zero (nominal
fit should already be perfect) or negative (over-parameterized model).
fit should already be perfect) or negative (over-parameterized model). Does not
require to run an additional fit to get the saturated model likelihood.
Args:
model (pyhf.pdf.Model): model used in the fit for which goodness-of-fit should
Expand All @@ -380,11 +381,16 @@ def _goodness_of_fit(
"""
if model.config.nauxdata > 0:
main_data, aux_data = model.fullpdf_tv.split(pyhf.tensorlib.astensor(data))

# need to obtain the parameters that maximize the constraint term
# (will not match suggested_init() when auxdata is custom)
best_pars = model_utils._parameters_maximizing_constraint_term(
model, pyhf.tensorlib.tolist(aux_data)
)

# constraint term: log Gaussian(aux_data|parameters) etc.
constraint_ll = pyhf.tensorlib.to_numpy(
model.constraint_logpdf(
aux_data, pyhf.tensorlib.astensor(model.config.suggested_init())
)
model.constraint_logpdf(aux_data, pyhf.tensorlib.astensor(best_pars))
)
else:
# no auxiliary data, so no constraint terms present
Expand Down
55 changes: 55 additions & 0 deletions src/cabinetry/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,3 +703,58 @@ def _modifier_map(
(channel["name"], sample["name"], modifier["name"])
].append(modifier["type"])
return modifier_map


def _parameters_maximizing_constraint_term(
model: pyhf.pdf.Model, aux_data: List[float]
) -> List[float]:
"""Returns parameters maximizing the constraint term for the given auxiliary data.
Parameters without an associated constraint term are set to their initial value.
Args:
model (pyhf.pdf.Model): model to use for constraint term evaluation
aux_data (List[float]): auxiliary data for which the constraint term should be
maximized
Returns:
List[float]: parameters maximizing the model constraint term
"""
best_pars = [] # parameters maximizing constraint term
i_aux = 0 # current position in auxiliary data list
i_poisson = 0 # current position in list of Poisson rescale factors

try:
# factors to rescale Poisson auxiliary data (index [0] for batch_size)
poisson_factors = model.constraint_model.constraints_poisson.batched_factors[0]
except AttributeError:
poisson_factors = None # no terms with Poisson constraint

for parname in model.config.par_order:
if parname in model.config.auxdata_order:
# parameter has associated auxdata: determine parameter value from it
# handle parameters controlling multiple bins
n_params = model.config.param_set(parname).n_parameters
if model.config.param_set(parname).pdf_type == "poisson":
# auxiliary data for Poisson terms needs rescaling
rescale_factors = [
float(rf)
for rf in poisson_factors[i_poisson : i_poisson + n_params]
]
i_poisson += n_params
else:
rescale_factors = [1.0] * n_params # no rescaling by default

best_pars += list(
np.asarray(aux_data[i_aux : i_aux + n_params]) / rescale_factors
)
i_aux += n_params

else:
# no associated auxdata (free-floating parameters): use init value
par_inits = pyhf.tensorlib.tolist(
model.config.param_set(parname).suggested_init
)
# ensure conversion to float https://github.com/scikit-hep/pyhf/issues/2065
best_pars += [float(par_init) for par_init in par_inits]
return best_pars
72 changes: 72 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,78 @@ def example_spec_lumi():
return spec


@pytest.fixture
def example_spec_modifiers():
spec = {
"channels": [
{
"name": "SR",
"samples": [
{
"data": [35.0, 30.0],
"modifiers": [
{
"data": None,
"name": "mu",
"type": "normfactor",
},
{
"data": None,
"name": "mu_shapefactor",
"type": "shapefactor",
},
{
"data": {"hi": 0.8, "lo": 1.2},
"name": "normsys",
"type": "normsys",
},
{
"data": {
"hi_data": [36.0, 31.0],
"lo_data": [34.0, 29.0],
},
"name": "histosys",
"type": "histosys",
},
{
"data": [1.0, 2.0],
"name": "staterror_SR",
"type": "staterror",
},
{
"data": [1.0, 2.0],
"name": "shapesys_SR",
"type": "shapesys",
},
{"data": None, "name": "lumi", "type": "lumi"},
],
"name": "Signal",
}
],
}
],
"measurements": [
{
"config": {
"parameters": [
{
"auxdata": [1.0],
"inits": [1.0],
"name": "lumi",
"sigmas": [0.02],
}
],
"poi": "mu",
},
"name": "lumi modifier",
}
],
"observations": [{"data": [35, 30], "name": "SR"}],
"version": "1.0.0",
}
return spec


# code below allows marking tests as slow and adds --runslow to run them
# implemented following https://docs.pytest.org/en/6.2.x/example/simple.html
def pytest_addoption(parser):
Expand Down
25 changes: 23 additions & 2 deletions tests/fit/test_fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,24 +323,45 @@ def func_to_minimize(pars):


@mock.patch("cabinetry.model_utils.unconstrained_parameter_count", return_value=1)
@mock.patch(
"cabinetry.model_utils._parameters_maximizing_constraint_term",
side_effect=[
[1.0, 1.0, 1.0, 1.0], # for example_spec_multibin
[1.0, 0.9, 1.1, 0.8], # for custom aux
[1.0], # for example_spec_no_aux
],
)
def test__goodness_of_fit(
mock_count, example_spec_multibin, example_spec_no_aux, caplog
mock_pars, mock_count, example_spec_multibin, example_spec_no_aux, caplog
):
caplog.set_level(logging.DEBUG)

model, data = model_utils.model_and_data(example_spec_multibin)
p_val = fit._goodness_of_fit(model, data, 9.964913)
assert mock_pars.call_count == 1
assert mock_pars.call_args[0][0].spec == model.spec
assert np.allclose(mock_pars.call_args[0][1], [1.0, 1.0, 1.0])
assert mock_pars.call_args[1] == {}
assert mock_count.call_count == 1
assert mock_count.call_args[0][0].spec == model.spec
assert mock_count.call_args[1] == {}
assert "Delta NLL = 0.084185" in [rec.message for rec in caplog.records]
assert np.allclose(p_val, 0.91926079)
caplog.clear()

# same setup but using custom auxdata
model, _ = model_utils.model_and_data(example_spec_multibin)
data = [35, 8, 10] + [0.9, 1.1, 0.8] # custom aux
p_val = fit._goodness_of_fit(model, data, 9.964913)
assert mock_pars.call_count == 2
assert np.allclose(mock_pars.call_args[0][1], [0.9, 1.1, 0.8]) # aux picked up
assert np.allclose(p_val, 0.91926079) # same result as before

# no auxdata and zero degrees of freedom in chi2 test
model, data = model_utils.model_and_data(example_spec_no_aux)
p_val = fit._goodness_of_fit(model, data, 6.01482863)
assert mock_count.call_count == 2
assert mock_pars.call_count == 2 # no new call (no auxdata)
assert mock_count.call_count == 3
assert mock_count.call_args[0][0].spec == model.spec
assert mock_count.call_args[1] == {}
assert (
Expand Down
23 changes: 23 additions & 0 deletions tests/test_model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,3 +515,26 @@ def test_modifier_map(example_spec):
("Signal Region", "Signal", "Signal strength"): ["normfactor"],
}
assert modifier_map[("a", "b", "c")] == []


def test__parameters_maximizing_constraint_term(
example_spec, example_spec_no_aux, example_spec_modifiers
):
# staterror, custom auxdata value
model = pyhf.Workspace(example_spec).model()
best_pars = model_utils._parameters_maximizing_constraint_term(model, [1.1])
assert np.allclose(best_pars, [1.0, 1.1])

# no auxdata
model = pyhf.Workspace(example_spec_no_aux).model()
best_pars = model_utils._parameters_maximizing_constraint_term(model, [])
assert np.allclose(best_pars, [1.0])

# all modifiers (including Poisson constraint for shapesys), custom auxdata values
# order: histosys, lumi, normfactor, normsys, shapefactor (2 bins),
# shapesys (2 bins), staterror (2 bins)
model = pyhf.Workspace(example_spec_modifiers).model()
best_pars = model_utils._parameters_maximizing_constraint_term(
model, [0.1, 1.1, 0.2, 1531.25, 281.25, 0.9, 0.8]
)
assert np.allclose(best_pars, [0.1, 1.1, 1.0, 0.2, 1.0, 1.0, 1.25, 1.25, 0.9, 0.8])

0 comments on commit 87a452b

Please sign in to comment.