From 8259bce9a681866a031ed6d9cf4d16736e4d4d55 Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Wed, 17 Apr 2024 23:54:55 -0500
Subject: [PATCH 1/3] fix: Cast non-hashable types to floats

---
 src/pyhf/infer/intervals/upper_limits.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/pyhf/infer/intervals/upper_limits.py b/src/pyhf/infer/intervals/upper_limits.py
index 601189cdbb..249bb435fa 100644
--- a/src/pyhf/infer/intervals/upper_limits.py
+++ b/src/pyhf/infer/intervals/upper_limits.py
@@ -92,6 +92,10 @@ def f(poi, level, limit=0):
         # Use integers for limit so we don't need a string comparison
         # limit == 0: Observed
         # else: expected
+
+        # Arrays are not hashable types, so cast to float
+        poi = float(poi)
+
         return (
             f_cached(poi)[0] - level
             if limit == 0

From bd10ae3bcf783b4018d8763f5504b1305663ae8d Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Thu, 18 Apr 2024 01:39:43 -0500
Subject: [PATCH 2/3] convert to floats to be able to cache

---
 src/pyhf/infer/intervals/upper_limits.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/pyhf/infer/intervals/upper_limits.py b/src/pyhf/infer/intervals/upper_limits.py
index 249bb435fa..8f0e41382a 100644
--- a/src/pyhf/infer/intervals/upper_limits.py
+++ b/src/pyhf/infer/intervals/upper_limits.py
@@ -15,7 +15,8 @@ def __dir__():
 
 def _interp(x, xp, fp):
     tb, _ = get_backend()
-    return tb.astensor(np.interp(x, xp.tolist(), fp.tolist()))
+    # xp has already been turned into a list at this point
+    return tb.astensor(np.interp(x, xp, fp.tolist()))
 
 
 def toms748_scan(
@@ -79,13 +80,19 @@ def toms748_scan(
 
     def f_cached(poi):
         if poi not in cache:
-            cache[poi] = hypotest(
+            # FIXME: scipy.optimize.toms748 still operates on floats,
+            # not any form of ndarray, so want everything in the
+            # cache to be a float.
+            # This may change with the Python array API standard
+            # in the future.
+            cls_obs, cls_exp_band = hypotest(
                 poi,
                 data,
                 model,
                 return_expected_set=True,
                 **hypotest_kwargs,
             )
+            cache[poi] = (float(cls_obs), [float(x) for x in cls_exp_band])
         return cache[poi]
 
     def f(poi, level, limit=0):
@@ -198,7 +205,9 @@ def linear_grid_scan(
     obs = tb.astensor([[r[0]] for r in results])
     exp = tb.astensor([[r[1][idx] for idx in range(5)] for r in results])
 
-    result_array = tb.concatenate([obs, exp], axis=1).T
+    # TODO: Can use `.T` after TensorFlow support is removed.
+    result_array = tb.transpose(tb.concatenate([obs, exp], axis=1))
+    result_array = tb.tolist(result_array)
 
     # observed limit and the (0, +-1, +-2)sigma expected limits
     limits = [_interp(level, result_array[idx][::-1], scan[::-1]) for idx in range(6)]

From e815612898ccbcd890b108127e7135f853fe2f4e Mon Sep 17 00:00:00 2001
From: Matthew Feickert <matthew.feickert@cern.ch>
Date: Thu, 18 Apr 2024 01:40:31 -0500
Subject: [PATCH 3/3] test: Add backend support to testing

* Adding tensorflow tests are very slow
---
 tests/test_infer.py | 64 ++++++++++++++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 24 deletions(-)

diff --git a/tests/test_infer.py b/tests/test_infer.py
index 0ccd072b94..596a982118 100644
--- a/tests/test_infer.py
+++ b/tests/test_infer.py
@@ -23,7 +23,7 @@ def check_uniform_type(in_list):
     )
 
 
-def test_toms748_scan(tmp_path, hypotest_args):
+def test_toms748_scan(backend, tmp_path, hypotest_args):
     """
     Test the upper limit toms748 scan returns the correct structure and values
     """
@@ -53,11 +53,12 @@ def test_toms748_scan(tmp_path, hypotest_args):
             for i in range(5)
         ]
     )
-    assert observed_cls == pytest.approx(0.05)
+    # FIXME: Remove float cast after TensorFlow support removed
+    assert float(observed_cls) == pytest.approx(0.05)
     assert expected_cls == pytest.approx(0.05)
 
 
-def test_toms748_scan_bounds_extension(hypotest_args):
+def test_toms748_scan_bounds_extension(backend, hypotest_args):
     """
     Test the upper limit toms748 scan bounds can correctly extend to bracket the CLs level
     """
@@ -72,18 +73,20 @@ def test_toms748_scan_bounds_extension(hypotest_args):
         data, model, 3, 5, rtol=1e-8
     )
 
-    assert observed_limit == pytest.approx(observed_limit_default)
+    # FIXME: Remove float cast after TensorFlow support removed
+    assert float(observed_limit) == pytest.approx(observed_limit_default)
     assert np.allclose(np.asarray(expected_limits), np.asarray(expected_limits_default))
 
     # Force bounds_up to expand
     observed_limit, expected_limits = pyhf.infer.intervals.upper_limits.toms748_scan(
         data, model, 0, 1, rtol=1e-8
     )
-    assert observed_limit == pytest.approx(observed_limit_default)
+    # FIXME: Remove float cast after TensorFlow support removed
+    assert float(observed_limit) == pytest.approx(observed_limit_default)
     assert np.allclose(np.asarray(expected_limits), np.asarray(expected_limits_default))
 
 
-def test_upper_limit_against_auto(hypotest_args):
+def test_upper_limit_against_auto(backend, hypotest_args):
     """
     Test upper_limit linear scan and toms748_scan return similar results
     """
@@ -97,11 +100,13 @@ def test_upper_limit_against_auto(hypotest_args):
     )
     obs_linear, exp_linear = results_linear
     # Can't expect these to be much closer given the low granularity of the linear scan
-    assert obs_auto == pytest.approx(obs_linear, abs=0.1)
+    # FIXME: Remove float cast after TensorFlow support removed
+    assert float(obs_auto) == pytest.approx(obs_linear, abs=0.1)
     assert np.allclose(exp_auto, exp_linear, atol=0.1)
 
 
-def test_upper_limit(hypotest_args):
+@pytest.mark.skip_numpy_minuit
+def test_upper_limit(backend, hypotest_args):
     """
     Check that the default return structure of pyhf.infer.hypotest is as expected
     """
@@ -110,22 +115,27 @@ def test_upper_limit(hypotest_args):
     results = pyhf.infer.intervals.upper_limits.upper_limit(data, model, scan=scan)
     assert len(results) == 2
     observed_limit, expected_limits = results
-    assert observed_limit == pytest.approx(1.0262704738584554)
-    assert expected_limits == pytest.approx(
-        [0.65765653, 0.87999725, 1.12453992, 1.50243428, 2.09232927]
+    # FIXME: Remove float cast after TensorFlow support removed
+    assert float(observed_limit) == pytest.approx(1.0262704738584554)
+    # FIXME: Can use expected_limits == pytest.approx([...]) after TensorFlow support removed
+    assert np.allclose(
+        expected_limits, [0.65765653, 0.87999725, 1.12453992, 1.50243428, 2.09232927]
     )
 
     # tighter relative tolerance needed for macos
     results = pyhf.infer.intervals.upper_limits.upper_limit(data, model, rtol=1e-6)
     assert len(results) == 2
     observed_limit, expected_limits = results
-    assert observed_limit == pytest.approx(1.01156939)
-    assert expected_limits == pytest.approx(
-        [0.55988001, 0.75702336, 1.06234693, 1.50116923, 2.05078596]
+    # FIXME: Remove float cast after TensorFlow support removed
+    assert float(observed_limit) == pytest.approx(1.01156939)
+    # FIXME: Can use expected_limits == pytest.approx([...]) after TensorFlow support removed
+    assert np.allclose(
+        expected_limits, [0.55988001, 0.75702336, 1.06234693, 1.50116923, 2.05078596]
     )
 
 
-def test_upper_limit_with_kwargs(hypotest_args):
+@pytest.mark.skip_numpy_minuit
+def test_upper_limit_with_kwargs(backend, hypotest_args):
     """
     Check that the default return structure of pyhf.infer.hypotest is as expected
     """
@@ -136,9 +146,11 @@ def test_upper_limit_with_kwargs(hypotest_args):
     )
     assert len(results) == 2
     observed_limit, expected_limits = results
-    assert observed_limit == pytest.approx(1.0262704738584554)
-    assert expected_limits == pytest.approx(
-        [0.65765653, 0.87999725, 1.12453992, 1.50243428, 2.09232927]
+    # FIXME: Remove float cast after TensorFlow support removed
+    assert float(observed_limit) == pytest.approx(1.0262704738584554)
+    # FIXME: Can use expected_limits == pytest.approx([...]) after TensorFlow support removed
+    assert np.allclose(
+        expected_limits, [0.65765653, 0.87999725, 1.12453992, 1.50243428, 2.09232927]
     )
 
     # linear_grid_scan
@@ -147,9 +159,11 @@ def test_upper_limit_with_kwargs(hypotest_args):
     )
     assert len(results) == 3
     observed_limit, expected_limits, (_scan, point_results) = results
-    assert observed_limit == pytest.approx(1.0262704738584554)
-    assert expected_limits == pytest.approx(
-        [0.65765653, 0.87999725, 1.12453992, 1.50243428, 2.09232927]
+    # FIXME: Remove float cast after TensorFlow support removed
+    assert float(observed_limit) == pytest.approx(1.0262704738584554)
+    # FIXME: Can use expected_limits == pytest.approx([...]) after TensorFlow support removed
+    assert np.allclose(
+        expected_limits, [0.65765653, 0.87999725, 1.12453992, 1.50243428, 2.09232927]
     )
     assert _scan.tolist() == scan.tolist()
     assert len(_scan) == len(point_results)
@@ -160,9 +174,11 @@ def test_upper_limit_with_kwargs(hypotest_args):
     )
     assert len(results) == 3
     observed_limit, expected_limits, (_scan, point_results) = results
-    assert observed_limit == pytest.approx(1.01156939)
-    assert expected_limits == pytest.approx(
-        [0.55988001, 0.75702336, 1.06234693, 1.50116923, 2.05078596]
+    # FIXME: Remove float cast after TensorFlow support removed
+    assert float(observed_limit) == pytest.approx(1.01156939)
+    # FIXME: Can use expected_limits == pytest.approx([...]) after TensorFlow support removed
+    assert np.allclose(
+        expected_limits, [0.55988001, 0.75702336, 1.06234693, 1.50116923, 2.05078596]
     )