diff --git a/causal_testing/surrogate/surrogate_search_algorithms.py b/causal_testing/surrogate/surrogate_search_algorithms.py
index 71e5d655..94984b6a 100644
--- a/causal_testing/surrogate/surrogate_search_algorithms.py
+++ b/causal_testing/surrogate/surrogate_search_algorithms.py
@@ -35,8 +35,8 @@ def search(
 
             # The GA fitness function after including required variables into the function's scope
             # Unused arguments are required for pygad's fitness function signature
-            #pylint: disable=cell-var-from-loop
-            def fitness_function(ga, solution, idx): # pylint: disable=unused-argument
+            # pylint: disable=cell-var-from-loop
+            def fitness_function(ga, solution, idx):  # pylint: disable=unused-argument
                 surrogate.control_value = solution[0] - self.delta
                 surrogate.treatment_value = solution[0] + self.delta
 
@@ -45,8 +45,10 @@ def fitness_function(ga, solution, idx): # pylint: disable=unused-argument
                     adjustment_dict[adjustment] = solution[i + 1]
 
                 ate = surrogate.estimate_ate_calculated(adjustment_dict)
-
-                return contradiction_function(ate)
+                if len(ate) > 1:
+                    raise ValueError(
+                        "Multiple ate values provided but currently only single values supported in this method")
+                return contradiction_function(ate[0])
 
             gene_types, gene_space = self.create_gene_types(surrogate, specification)
 
@@ -82,7 +84,7 @@ def fitness_function(ga, solution, idx): # pylint: disable=unused-argument
 
     @staticmethod
     def create_gene_types(
-        surrogate_model: CubicSplineRegressionEstimator, specification: CausalSpecification
+            surrogate_model: CubicSplineRegressionEstimator, specification: CausalSpecification
     ) -> tuple[list, list]:
         """Generate the gene_types and gene_space for a given fitness function and specification
         :param surrogate_model: Instance of a CubicSplineRegressionEstimator
diff --git a/causal_testing/testing/causal_test_outcome.py b/causal_testing/testing/causal_test_outcome.py
index f045828c..0c3ae1e4 100644
--- a/causal_testing/testing/causal_test_outcome.py
+++ b/causal_testing/testing/causal_test_outcome.py
@@ -27,14 +27,13 @@ class SomeEffect(CausalTestOutcome):
     """An extension of TestOutcome representing that the expected causal effect should not be zero."""
 
     def apply(self, res: CausalTestResult) -> bool:
-        if res.test_value.type == "ate":
-            return (0 < res.ci_low() < res.ci_high()) or (res.ci_low() < res.ci_high() < 0)
-        if res.test_value.type == "coefficient":
-            ci_low = res.ci_low() if isinstance(res.ci_low(), Iterable) else [res.ci_low()]
-            ci_high = res.ci_high() if isinstance(res.ci_high(), Iterable) else [res.ci_high()]
-            return any(0 < ci_low < ci_high or ci_low < ci_high < 0 for ci_low, ci_high in zip(ci_low, ci_high))
         if res.test_value.type == "risk_ratio":
-            return (1 < res.ci_low() < res.ci_high()) or (res.ci_low() < res.ci_high() < 1)
+            return any(
+                1 < ci_low < ci_high or ci_low < ci_high < 1 for ci_low, ci_high in zip(res.ci_low(), res.ci_high()))
+        if res.test_value.type in ('coefficient', 'ate'):
+            return any(
+                0 < ci_low < ci_high or ci_low < ci_high < 0 for ci_low, ci_high in zip(res.ci_low(), res.ci_high()))
+
         raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome")
 
 
@@ -51,23 +50,20 @@ def __init__(self, atol: float = 1e-10, ctol: float = 0.05):
         self.ctol = ctol
 
     def apply(self, res: CausalTestResult) -> bool:
-        if res.test_value.type == "ate":
-            return (res.ci_low() < 0 < res.ci_high()) or (abs(res.test_value.value) < self.atol)
-        if res.test_value.type == "coefficient":
-            ci_low = res.ci_low() if isinstance(res.ci_low(), Iterable) else [res.ci_low()]
-            ci_high = res.ci_high() if isinstance(res.ci_high(), Iterable) else [res.ci_high()]
+        if res.test_value.type == "risk_ratio":
+            return any(ci_low < 1 < ci_high or np.isclose(value, 1.0, atol=self.atol) for ci_low, ci_high, value in
+                       zip(res.ci_low(), res.ci_high(), res.test_value.value))
+        if res.test_value.type in ('coefficient', 'ate'):
             value = res.test_value.value if isinstance(res.ci_high(), Iterable) else [res.test_value.value]
-
             return (
-                sum(
-                    not ((ci_low < 0 < ci_high) or abs(v) < self.atol)
-                    for ci_low, ci_high, v in zip(ci_low, ci_high, value)
-                )
-                / len(value)
-                < self.ctol
+                    sum(
+                        not ((ci_low < 0 < ci_high) or abs(v) < self.atol)
+                        for ci_low, ci_high, v in zip(res.ci_low(), res.ci_high(), value)
+                    )
+                    / len(value)
+                    < self.ctol
             )
-        if res.test_value.type == "risk_ratio":
-            return (res.ci_low() < 1 < res.ci_high()) or np.isclose(res.test_value.value, 1.0, atol=self.atol)
+
         raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome")
 
 
@@ -93,28 +89,33 @@ def __str__(self):
 
 
 class Positive(SomeEffect):
-    """An extension of TestOutcome representing that the expected causal effect should be positive."""
+    """An extension of TestOutcome representing that the expected causal effect should be positive.
+    Currently only single values are supported for the test value"""
 
     def apply(self, res: CausalTestResult) -> bool:
         if res.ci_valid() and not super().apply(res):
             return False
+        if len(res.test_value.value) > 1:
+            raise ValueError("Positive Effects are currently only supported on single float datatypes")
         if res.test_value.type in {"ate", "coefficient"}:
-            return bool(res.test_value.value > 0)
+            return bool(res.test_value.value[0] > 0)
         if res.test_value.type == "risk_ratio":
-            return bool(res.test_value.value > 1)
-        # Dead code but necessary for pylint
+            return bool(res.test_value.value[0] > 1)
         raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome")
 
 
 class Negative(SomeEffect):
-    """An extension of TestOutcome representing that the expected causal effect should be negative."""
+    """An extension of TestOutcome representing that the expected causal effect should be negative.
+    Currently only single values are supported for the test value"""
 
     def apply(self, res: CausalTestResult) -> bool:
         if res.ci_valid() and not super().apply(res):
             return False
+        if len(res.test_value.value) > 1:
+            raise ValueError("Negative Effects are currently only supported on single float datatypes")
         if res.test_value.type in {"ate", "coefficient"}:
-            return bool(res.test_value.value < 0)
+            return bool(res.test_value.value[0] < 0)
         if res.test_value.type == "risk_ratio":
-            return bool(res.test_value.value < 1)
+            return bool(res.test_value.value[0] < 1)
         # Dead code but necessary for pylint
         raise ValueError(f"Test Value type {res.test_value.type} is not valid for this TestOutcome")
diff --git a/causal_testing/testing/causal_test_result.py b/causal_testing/testing/causal_test_result.py
index 7963afd1..afae6195 100644
--- a/causal_testing/testing/causal_test_result.py
+++ b/causal_testing/testing/causal_test_result.py
@@ -27,7 +27,7 @@ def __init__(
         self,
         estimator: Estimator,
         test_value: TestValue,
-        confidence_intervals: [float, float] = None,
+        confidence_intervals: [pd.Series, pd.Series] = None,
         effect_modifier_configuration: {Variable: Any} = None,
         adequacy=None,
     ):
@@ -99,12 +99,16 @@ def to_dict(self, json=False):
     def ci_low(self):
         """Return the lower bracket of the confidence intervals."""
         if self.confidence_intervals:
+            if isinstance(self.confidence_intervals[0], pd.Series):
+                return self.confidence_intervals[0].to_list()
             return self.confidence_intervals[0]
         return None
 
     def ci_high(self):
         """Return the higher bracket of the confidence intervals."""
         if self.confidence_intervals:
+            if isinstance(self.confidence_intervals[1], pd.Series):
+                return self.confidence_intervals[1].to_list()
             return self.confidence_intervals[1]
         return None
 
diff --git a/causal_testing/testing/estimators.py b/causal_testing/testing/estimators.py
index e3996a0d..895124bf 100644
--- a/causal_testing/testing/estimators.py
+++ b/causal_testing/testing/estimators.py
@@ -11,7 +11,7 @@
 import statsmodels.formula.api as smf
 from econml.dml import CausalForestDML
 from patsy import dmatrix  # pylint: disable = no-name-in-module
-
+from patsy import ModelDesc
 from sklearn.ensemble import GradientBoostingRegressor
 from statsmodels.regression.linear_model import RegressionResultsWrapper
 from statsmodels.tools.sm_exceptions import PerfectSeparationError
@@ -343,7 +343,7 @@ def add_modelling_assumptions(self):
             "do not need to be linear."
         )
 
-    def estimate_coefficient(self) -> float:
+    def estimate_coefficient(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         """Estimate the unit average treatment effect of the treatment on the outcome. That is, the change in outcome
         caused by a unit change in treatment.
 
@@ -351,22 +351,20 @@ def estimate_coefficient(self) -> float:
         """
         model = self._run_linear_regression()
         newline = "\n"
-        treatment = [self.treatment]
-        if str(self.df.dtypes[self.treatment]) == "object":
+        patsy_md = ModelDesc.from_formula(self.treatment)
+        if any((self.df.dtypes[factor.name()] == 'object' for factor in patsy_md.rhs_termlist[1].factors)):
             design_info = dmatrix(self.formula.split("~")[1], self.df).design_info
             treatment = design_info.column_names[design_info.term_name_slices[self.treatment]]
+        else:
+            treatment = [self.treatment]
         assert set(treatment).issubset(
             model.params.index.tolist()
         ), f"{treatment} not in\n{'  ' + str(model.params.index).replace(newline, newline + '  ')}"
         unit_effect = model.params[treatment]  # Unit effect is the coefficient of the treatment
         [ci_low, ci_high] = self._get_confidence_intervals(model, treatment)
-        if str(self.df.dtypes[self.treatment]) != "object":
-            unit_effect = unit_effect[0]
-            ci_low = ci_low[0]
-            ci_high = ci_high[0]
         return unit_effect, [ci_low, ci_high]
 
-    def estimate_ate(self) -> tuple[float, list[float, float], float]:
+    def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         """Estimate the average treatment effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value.
 
@@ -384,8 +382,9 @@ def estimate_ate(self) -> tuple[float, list[float, float], float]:
 
         # Perform a t-test to compare the predicted outcome of the control and treated individual (ATE)
         t_test_results = model.t_test(individuals.loc["treated"] - individuals.loc["control"])
-        ate = t_test_results.effect[0]
+        ate = pd.Series(t_test_results.effect[0])
         confidence_intervals = list(t_test_results.conf_int(alpha=self.alpha).flatten())
+        confidence_intervals = [pd.Series(interval) for interval in confidence_intervals]
         return ate, confidence_intervals
 
     def estimate_control_treatment(self, adjustment_config: dict = None) -> tuple[pd.Series, pd.Series]:
@@ -414,7 +413,7 @@ def estimate_control_treatment(self, adjustment_config: dict = None) -> tuple[pd
 
         return y.iloc[1], y.iloc[0]
 
-    def estimate_risk_ratio(self, adjustment_config: dict = None) -> tuple[float, list[float, float]]:
+    def estimate_risk_ratio(self, adjustment_config: dict = None) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         """Estimate the risk_ratio effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value.
 
@@ -423,12 +422,11 @@ def estimate_risk_ratio(self, adjustment_config: dict = None) -> tuple[float, li
         if adjustment_config is None:
             adjustment_config = {}
         control_outcome, treatment_outcome = self.estimate_control_treatment(adjustment_config=adjustment_config)
-        ci_low = treatment_outcome["mean_ci_lower"] / control_outcome["mean_ci_upper"]
-        ci_high = treatment_outcome["mean_ci_upper"] / control_outcome["mean_ci_lower"]
-
-        return (treatment_outcome["mean"] / control_outcome["mean"]), [ci_low, ci_high]
+        ci_low = pd.Series(treatment_outcome["mean_ci_lower"] / control_outcome["mean_ci_upper"])
+        ci_high = pd.Series(treatment_outcome["mean_ci_upper"] / control_outcome["mean_ci_lower"])
+        return pd.Series(treatment_outcome["mean"] / control_outcome["mean"]), [ci_low, ci_high]
 
-    def estimate_ate_calculated(self, adjustment_config: dict = None) -> tuple[float, list[float, float]]:
+    def estimate_ate_calculated(self, adjustment_config: dict = None) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         """Estimate the ate effect of the treatment on the outcome. That is, the change in outcome caused
         by changing the treatment variable from the control value to the treatment value. Here, we actually
         calculate the expected outcomes under control and treatment and divide one by the other. This
@@ -439,10 +437,9 @@ def estimate_ate_calculated(self, adjustment_config: dict = None) -> tuple[float
         if adjustment_config is None:
             adjustment_config = {}
         control_outcome, treatment_outcome = self.estimate_control_treatment(adjustment_config=adjustment_config)
-        ci_low = treatment_outcome["mean_ci_lower"] - control_outcome["mean_ci_upper"]
-        ci_high = treatment_outcome["mean_ci_upper"] - control_outcome["mean_ci_lower"]
-
-        return (treatment_outcome["mean"] - control_outcome["mean"]), [ci_low, ci_high]
+        ci_low = pd.Series(treatment_outcome["mean_ci_lower"] - control_outcome["mean_ci_upper"])
+        ci_high = pd.Series(treatment_outcome["mean_ci_upper"] - control_outcome["mean_ci_lower"])
+        return pd.Series(treatment_outcome["mean"] - control_outcome["mean"]), [ci_low, ci_high]
 
     def _run_linear_regression(self) -> RegressionResultsWrapper:
         """Run linear regression of the treatment and adjustment set against the outcome and return the model.
@@ -456,8 +453,8 @@ def _run_linear_regression(self) -> RegressionResultsWrapper:
     def _get_confidence_intervals(self, model, treatment):
         confidence_intervals = model.conf_int(alpha=self.alpha, cols=None)
         ci_low, ci_high = (
-            confidence_intervals[0].loc[treatment],
-            confidence_intervals[1].loc[treatment],
+            pd.Series(confidence_intervals[0].loc[treatment]),
+            pd.Series(confidence_intervals[1].loc[treatment]),
         )
         return [ci_low, ci_high]
 
@@ -495,7 +492,7 @@ def __init__(
             terms = [treatment] + sorted(list(adjustment_set)) + sorted(list(effect_modifiers))
             self.formula = f"{outcome} ~ cr({'+'.join(terms)}, df={basis})"
 
-    def estimate_ate_calculated(self, adjustment_config: dict = None) -> float:
+    def estimate_ate_calculated(self, adjustment_config: dict = None) -> pd.Series:
         model = self._run_linear_regression()
 
         x = {"Intercept": 1, self.treatment: self.treatment_value}
@@ -511,7 +508,7 @@ def estimate_ate_calculated(self, adjustment_config: dict = None) -> float:
         x[self.treatment] = self.control_value
         control = model.predict(x).iloc[0]
 
-        return treatment - control
+        return pd.Series(treatment - control)
 
 
 class InstrumentalVariableEstimator(Estimator):
@@ -567,7 +564,7 @@ def add_modelling_assumptions(self):
         """
         )
 
-    def estimate_iv_coefficient(self, df):
+    def estimate_iv_coefficient(self, df) -> float:
         """
         Estimate the linear regression coefficient of the treatment on the
         outcome.
@@ -581,7 +578,7 @@ def estimate_iv_coefficient(self, df):
         # Estimate the coefficient of I on X by cancelling
         return ab / a
 
-    def estimate_coefficient(self, bootstrap_size=100):
+    def estimate_coefficient(self, bootstrap_size=100) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         """
         Estimate the unit ate (i.e. coefficient) of the treatment on the
         outcome.
@@ -590,10 +587,10 @@ def estimate_coefficient(self, bootstrap_size=100):
             [self.estimate_iv_coefficient(self.df.sample(len(self.df), replace=True)) for _ in range(bootstrap_size)]
         )
         bound = ceil((bootstrap_size * self.alpha) / 2)
-        ci_low = bootstraps[bound]
-        ci_high = bootstraps[bootstrap_size - bound]
+        ci_low = pd.Series(bootstraps[bound])
+        ci_high = pd.Series(bootstraps[bootstrap_size - bound])
 
-        return self.estimate_iv_coefficient(self.df), (ci_low, ci_high)
+        return pd.Series(self.estimate_iv_coefficient(self.df)), [ci_low, ci_high]
 
 
 class CausalForestEstimator(Estimator):
@@ -610,7 +607,7 @@ def add_modelling_assumptions(self):
         """
         self.modelling_assumptions.append("Non-parametric estimator: no restrictions imposed on the data.")
 
-    def estimate_ate(self) -> float:
+    def estimate_ate(self) -> tuple[pd.Series, list[pd.Series, pd.Series]]:
         """Estimate the average treatment effect.
 
         :return ate, confidence_intervals: The average treatment effect and 95% confidence intervals.
@@ -638,9 +635,9 @@ def estimate_ate(self) -> float:
         model.fit(outcome_df, treatment_df, X=effect_modifier_df, W=confounders_df)
 
         # Obtain the ATE and 95% confidence intervals
-        ate = model.ate(effect_modifier_df, T0=self.control_value, T1=self.treatment_value)
+        ate = pd.Series(model.ate(effect_modifier_df, T0=self.control_value, T1=self.treatment_value))
         ate_interval = model.ate_interval(effect_modifier_df, T0=self.control_value, T1=self.treatment_value)
-        ci_low, ci_high = ate_interval[0], ate_interval[1]
+        ci_low, ci_high = pd.Series(ate_interval[0]), pd.Series(ate_interval[1])
         return ate, [ci_low, ci_high]
 
     def estimate_cates(self) -> pd.DataFrame:
diff --git a/examples/covasim_/doubling_beta/example_beta.py b/examples/covasim_/doubling_beta/example_beta.py
index 69a84cfa..d4ae15c0 100644
--- a/examples/covasim_/doubling_beta/example_beta.py
+++ b/examples/covasim_/doubling_beta/example_beta.py
@@ -276,8 +276,8 @@ def setup(observational_data):
 
 def plot_doubling_beta_CATEs(results_dict, title, figure=None, axes=None, row=None, col=None):
     # Get the CATE as a percentage for association and causation
-    ate = results_dict["causation"]["ate"]
-    association_ate = results_dict["association"]["ate"]
+    ate = results_dict["causation"]["ate"][0]
+    association_ate = results_dict["association"]["ate"][0]
 
     causation_df = results_dict["causation"]["df"]
     association_df = results_dict["association"]["df"]
@@ -288,11 +288,10 @@ def plot_doubling_beta_CATEs(results_dict, title, figure=None, axes=None, row=No
     # Get 95% confidence intervals for association and causation
     ate_cis = results_dict["causation"]["cis"]
     association_ate_cis = results_dict["association"]["cis"]
-    percentage_causal_ate_cis = [round(((ci / causation_df["cum_infections"].mean()) * 100), 3) for ci in ate_cis]
+    percentage_causal_ate_cis = [round(((ci[0] / causation_df["cum_infections"].mean()) * 100), 3) for ci in ate_cis]
     percentage_association_ate_cis = [
-        round(((ci / association_df["cum_infections"].mean()) * 100), 3) for ci in association_ate_cis
+        round(((ci[0] / association_df["cum_infections"].mean()) * 100), 3) for ci in association_ate_cis
     ]
-
     # Convert confidence intervals to errors for plotting
     percentage_causal_errs = [
         percentage_ate - percentage_causal_ate_cis[0],
@@ -314,9 +313,9 @@ def plot_doubling_beta_CATEs(results_dict, title, figure=None, axes=None, row=No
     if "counterfactual" in results_dict.keys():
         cf_ate = results_dict["counterfactual"]["ate"]
         cf_df = results_dict["counterfactual"]["df"]
-        percentage_cf_ate = round((cf_ate / cf_df["cum_infections"].mean()) * 100, 3)
+        percentage_cf_ate = round((cf_ate[0] / cf_df["cum_infections"].mean()) * 100, 3)
         cf_ate_cis = results_dict["counterfactual"]["cis"]
-        percentage_cf_cis = [round(((ci / cf_df["cum_infections"].mean()) * 100), 3) for ci in cf_ate_cis]
+        percentage_cf_cis = [round(((ci[0] / cf_df["cum_infections"].mean()) * 100), 3) for ci in cf_ate_cis]
         percentage_cf_errs = [percentage_cf_ate - percentage_cf_cis[0], percentage_cf_cis[1] - percentage_cf_ate]
         xs = [0.5, 1.5, 2.5]
         ys = [association_percentage_ate, percentage_ate, percentage_cf_ate]
diff --git a/examples/lr91/example_max_conductances.py b/examples/lr91/example_max_conductances.py
index d73ae6f8..bef94f04 100644
--- a/examples/lr91/example_max_conductances.py
+++ b/examples/lr91/example_max_conductances.py
@@ -164,8 +164,8 @@ def plot_ates_with_cis(results_dict: dict, xs: list, save: bool = False, show: b
         before_underscore, after_underscore = treatment.split("_")
         after_underscore_braces = f"{{{after_underscore}}}"
         latex_compatible_treatment_str = rf"${before_underscore}_{after_underscore_braces}$"
-        cis_low = [c[0] for c in cis]
-        cis_high = [c[1] for c in cis]
+        cis_low = [c[0][0] for c in cis]
+        cis_high = [c[1][0] for c in cis]
         axes.fill_between(
             xs, cis_low, cis_high, alpha=0.2, color=input_colors[treatment], label=latex_compatible_treatment_str
         )
diff --git a/examples/lr91/example_max_conductances_test_suite.py b/examples/lr91/example_max_conductances_test_suite.py
index c704777f..fe9c1e2c 100644
--- a/examples/lr91/example_max_conductances_test_suite.py
+++ b/examples/lr91/example_max_conductances_test_suite.py
@@ -166,8 +166,8 @@ def plot_ates_with_cis(results_dict: dict, xs: list, save: bool = False, show=Fa
         before_underscore, after_underscore = treatment.split("_")
         after_underscore_braces = f"{{{after_underscore}}}"
         latex_compatible_treatment_str = rf"${before_underscore}_{after_underscore_braces}$"
-        cis_low = [c[0] for c in cis]
-        cis_high = [c[1] for c in cis]
+        cis_low = [c[0][0] for c in cis]
+        cis_high = [c[1][0] for c in cis]
         axes.fill_between(
             xs, cis_low, cis_high, alpha=0.2, color=input_colors[treatment], label=latex_compatible_treatment_str
         )
diff --git a/examples/poisson-line-process/example_poisson_process.py b/examples/poisson-line-process/example_poisson_process.py
index ae8d07e0..820c3537 100644
--- a/examples/poisson-line-process/example_poisson_process.py
+++ b/examples/poisson-line-process/example_poisson_process.py
@@ -198,8 +198,8 @@ def test_poisson_width_num_shapes(save=False):
                 "treatment": treatment_value,
                 "intensity": i,
                 "ate": causal_test_result.test_value.value,
-                "ci_low": min(causal_test_result.confidence_intervals),
-                "ci_high": max(causal_test_result.confidence_intervals),
+                "ci_low": causal_test_result.confidence_intervals[0][0],
+                "ci_high": causal_test_result.confidence_intervals[1][0],
             }
             width_num_shapes_results.append(results)
     width_num_shapes_results = pd.DataFrame(width_num_shapes_results)
diff --git a/tests/testing_tests/test_causal_test_case.py b/tests/testing_tests/test_causal_test_case.py
index 2e2ab52e..433aeb64 100644
--- a/tests/testing_tests/test_causal_test_case.py
+++ b/tests/testing_tests/test_causal_test_case.py
@@ -118,7 +118,7 @@ def test_execute_test_observational_causal_forest_estimator(self):
             self.df,
         )
         causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
-        self.assertAlmostEqual(causal_test_result.test_value.value, 4, delta=1)
+        pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(4.0), atol=1)
 
     def test_invalid_causal_effect(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -140,7 +140,7 @@ def test_execute_test_observational_linear_regression_estimator(self):
             self.df,
         )
         causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
-        self.assertAlmostEqual(causal_test_result.test_value.value, 4, delta=1e-10)
+        pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(4.0), atol=1e-10)
 
     def test_execute_test_observational_linear_regression_estimator_direct_effect(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -167,7 +167,7 @@ def test_execute_test_observational_linear_regression_estimator_direct_effect(se
             self.df,
         )
         causal_test_result = causal_test_case.execute_test(estimation_model, self.data_collector)
-        self.assertAlmostEqual(causal_test_result.test_value.value, 4, delta=1e-10)
+        pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(4.0), atol=1e-10)
 
     def test_execute_test_observational_linear_regression_estimator_coefficient(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -182,7 +182,7 @@ def test_execute_test_observational_linear_regression_estimator_coefficient(self
         )
         self.causal_test_case.estimate_type = "coefficient"
         causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
-        self.assertEqual(int(causal_test_result.test_value.value), 0)
+        pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series({'D': 0.0}), atol=1e-1)
 
     def test_execute_test_observational_linear_regression_estimator_risk_ratio(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -197,7 +197,7 @@ def test_execute_test_observational_linear_regression_estimator_risk_ratio(self)
         )
         self.causal_test_case.estimate_type = "risk_ratio"
         causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
-        self.assertEqual(int(causal_test_result.test_value.value), 0)
+        pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(0.0), atol=1)
 
     def test_invalid_estimate_type(self):
         """Check that executing the causal test case returns the correct results for dummy data using a linear
@@ -227,7 +227,7 @@ def test_execute_test_observational_linear_regression_estimator_squared_term(sel
             formula=f"C ~ A + {'+'.join(self.minimal_adjustment_set)} + (D ** 2)",
         )
         causal_test_result = self.causal_test_case.execute_test(estimation_model, self.data_collector)
-        self.assertAlmostEqual(round(causal_test_result.test_value.value, 1), 4, delta=1)
+        pd.testing.assert_series_equal(causal_test_result.test_value.value, pd.Series(4.0), atol=1)
 
     def test_execute_observational_causal_forest_estimator_cates(self):
         """Check that executing the causal test case returns the correct conditional average treatment effects for
diff --git a/tests/testing_tests/test_causal_test_outcome.py b/tests/testing_tests/test_causal_test_outcome.py
index a2b0173d..235cc724 100644
--- a/tests/testing_tests/test_causal_test_outcome.py
+++ b/tests/testing_tests/test_causal_test_outcome.py
@@ -1,4 +1,5 @@
 import unittest
+import pandas as pd
 from causal_testing.testing.causal_test_outcome import ExactValue, SomeEffect, Positive, Negative, NoEffect
 from causal_testing.testing.causal_test_result import CausalTestResult, TestValue
 from causal_testing.testing.estimators import LinearRegressionEstimator
@@ -69,7 +70,7 @@ def test_empty_adjustment_set(self):
         )
 
     def test_Positive_ate_pass(self):
-        test_value = TestValue(type="ate", value=5.05)
+        test_value = TestValue(type="ate", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -80,7 +81,7 @@ def test_Positive_ate_pass(self):
         self.assertTrue(ev.apply(ctr))
 
     def test_Positive_risk_ratio_pass(self):
-        test_value = TestValue(type="risk_ratio", value=2)
+        test_value = TestValue(type="risk_ratio", value=pd.Series(2))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -91,7 +92,7 @@ def test_Positive_risk_ratio_pass(self):
         self.assertTrue(ev.apply(ctr))
 
     def test_Positive_fail(self):
-        test_value = TestValue(type="ate", value=0)
+        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -102,18 +103,18 @@ def test_Positive_fail(self):
         self.assertFalse(ev.apply(ctr))
 
     def test_Positive_fail_ci(self):
-        test_value = TestValue(type="ate", value=0)
+        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[-1, 1],
+            confidence_intervals=[pd.Series(-1), pd.Series(1)],
             effect_modifier_configuration=None,
         )
         ev = Positive()
         self.assertFalse(ev.apply(ctr))
 
     def test_Negative_ate_pass(self):
-        test_value = TestValue(type="ate", value=-5.05)
+        test_value = TestValue(type="ate", value=pd.Series(-5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -124,7 +125,7 @@ def test_Negative_ate_pass(self):
         self.assertTrue(ev.apply(ctr))
 
     def test_Negative_risk_ratio_pass(self):
-        test_value = TestValue(type="risk_ratio", value=0.2)
+        test_value = TestValue(type="risk_ratio", value=pd.Series(0.2))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -135,7 +136,7 @@ def test_Negative_risk_ratio_pass(self):
         self.assertTrue(ev.apply(ctr))
 
     def test_Negative_fail(self):
-        test_value = TestValue(type="ate", value=0)
+        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -146,18 +147,18 @@ def test_Negative_fail(self):
         self.assertFalse(ev.apply(ctr))
 
     def test_Negative_fail_ci(self):
-        test_value = TestValue(type="ate", value=0)
+        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[-1, 1],
+            confidence_intervals=[pd.Series(-1), pd.Series(1)],
             effect_modifier_configuration=None,
         )
         ev = Negative()
         self.assertFalse(ev.apply(ctr))
 
     def test_exactValue_pass(self):
-        test_value = TestValue(type="ate", value=5.05)
+        test_value = TestValue(type="ate", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -168,18 +169,18 @@ def test_exactValue_pass(self):
         self.assertTrue(ev.apply(ctr))
 
     def test_exactValue_pass_ci(self):
-        test_value = TestValue(type="ate", value=5.05)
+        test_value = TestValue(type="ate", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[4, 6],
+            confidence_intervals=[pd.Series(4), pd.Series(6)],
             effect_modifier_configuration=None,
         )
         ev = ExactValue(5, 0.1)
         self.assertTrue(ev.apply(ctr))
 
     def test_exactValue_fail(self):
-        test_value = TestValue(type="ate", value=0)
+        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
@@ -194,11 +195,11 @@ def test_invalid_atol(self):
             ExactValue(5, -0.1)
 
     def test_invalid(self):
-        test_value = TestValue(type="invalid", value=5.05)
+        test_value = TestValue(type="invalid", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[4.8, 6.7],
+            confidence_intervals=[pd.Series(4.8), pd.Series(6.7)],
             effect_modifier_configuration=None,
         )
         with self.assertRaises(ValueError):
@@ -211,44 +212,44 @@ def test_invalid(self):
             Negative().apply(ctr)
 
     def test_someEffect_pass_coefficient(self):
-        test_value = TestValue(type="coefficient", value=5.05)
+        test_value = TestValue(type="coefficient", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[4.8, 6.7],
+            confidence_intervals=[pd.Series(4.8), pd.Series(6.7)],
             effect_modifier_configuration=None,
         )
         self.assertTrue(SomeEffect().apply(ctr))
         self.assertFalse(NoEffect().apply(ctr))
 
     def test_someEffect_pass_ate(self):
-        test_value = TestValue(type="ate", value=5.05)
+        test_value = TestValue(type="ate", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[4.8, 6.7],
+            confidence_intervals=[pd.Series(4.8), pd.Series(6.7)],
             effect_modifier_configuration=None,
         )
         self.assertTrue(SomeEffect().apply(ctr))
         self.assertFalse(NoEffect().apply(ctr))
 
     def test_someEffect_pass_rr(self):
-        test_value = TestValue(type="risk_ratio", value=5.05)
+        test_value = TestValue(type="risk_ratio", value=pd.Series(5.05))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[4.8, 6.7],
+            confidence_intervals=[pd.Series(4.8), pd.Series(6.7)],
             effect_modifier_configuration=None,
         )
         self.assertTrue(SomeEffect().apply(ctr))
         self.assertFalse(NoEffect().apply(ctr))
 
     def test_someEffect_fail(self):
-        test_value = TestValue(type="ate", value=0)
+        test_value = TestValue(type="ate", value=pd.Series(0))
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[-0.1, 0.2],
+            confidence_intervals=[pd.Series(-0.1), pd.Series(0.2)],
             effect_modifier_configuration=None,
         )
         self.assertFalse(SomeEffect().apply(ctr))
@@ -259,7 +260,7 @@ def test_someEffect_str(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[-0.1, 0.2],
+            confidence_intervals=[pd.Series(-0.1), pd.Series(0.2)],
             effect_modifier_configuration=None,
         )
         ev = SomeEffect()
@@ -273,8 +274,8 @@ def test_someEffect_str(self):
                 "adjustment_set": set(),
                 "effect_estimate": 0,
                 "effect_measure": "ate",
-                "ci_low": -0.1,
-                "ci_high": 0.2,
+                "ci_low": [-0.1],
+                "ci_high": [0.2],
             },
         )
 
@@ -283,7 +284,7 @@ def test_someEffect_dict(self):
         ctr = CausalTestResult(
             estimator=self.estimator,
             test_value=test_value,
-            confidence_intervals=[-0.1, 0.2],
+            confidence_intervals=[pd.Series(-0.1), pd.Series(0.2)],
             effect_modifier_configuration=None,
         )
         ev = SomeEffect()
@@ -297,8 +298,8 @@ def test_someEffect_dict(self):
                 "adjustment_set": set(),
                 "effect_estimate": 0,
                 "effect_measure": "ate",
-                "ci_low": -0.1,
-                "ci_high": 0.2,
+                "ci_low": [-0.1],
+                "ci_high": [0.2],
             },
         )
 
@@ -321,3 +322,16 @@ def test_negative_risk_ratio_e_value_using_ci(self):
         cv = CausalValidator()
         e_value = cv.estimate_e_value_using_ci(0.8, [0.2, 0.9])
         self.assertEqual(round(e_value, 4), 1.4625)
+
+    def test_multiple_value_exception_caught(self):
+        test_value = TestValue(type="ate", value=pd.Series([0, 1]))
+        ctr = CausalTestResult(
+            estimator=self.estimator,
+            test_value=test_value,
+            confidence_intervals=[None, None],
+            effect_modifier_configuration=None,
+        )
+        with self.assertRaises(ValueError):
+            Positive().apply(ctr)
+        with self.assertRaises(ValueError):
+            Negative().apply(ctr)
diff --git a/tests/testing_tests/test_causal_test_suite.py b/tests/testing_tests/test_causal_test_suite.py
index b3d0f448..1fd28bb3 100644
--- a/tests/testing_tests/test_causal_test_suite.py
+++ b/tests/testing_tests/test_causal_test_suite.py
@@ -98,7 +98,7 @@ def test_execute_test_suite_single_base_test_case(self):
 
         causal_test_results = self.test_suite.execute_test_suite(self.data_collector, self.causal_specification)
         causal_test_case_result = causal_test_results[self.base_test_case]
-        self.assertAlmostEqual(causal_test_case_result["LinearRegressionEstimator"][0].test_value.value, 4, delta=1e-10)
+        self.assertAlmostEqual(causal_test_case_result["LinearRegressionEstimator"][0].test_value.value[0], 4, delta=1e-10)
 
     def test_execute_test_suite_multiple_estimators(self):
         """Check that executing a test suite with multiple estimators returns correct results for the dummy data
@@ -114,5 +114,5 @@ def test_execute_test_suite_multiple_estimators(self):
         causal_test_case_result = causal_test_results[self.base_test_case]
         linear_regression_result = causal_test_case_result["LinearRegressionEstimator"][0]
         causal_forrest_result = causal_test_case_result["CausalForestEstimator"][0]
-        self.assertAlmostEqual(linear_regression_result.test_value.value, 4, delta=1e-1)
-        self.assertAlmostEqual(causal_forrest_result.test_value.value, 4, delta=1e-1)
+        self.assertAlmostEqual(linear_regression_result.test_value.value[0], 4, delta=1e-1)
+        self.assertAlmostEqual(causal_forrest_result.test_value.value[0], 4, delta=1e-1)
diff --git a/tests/testing_tests/test_estimators.py b/tests/testing_tests/test_estimators.py
index 7811fab8..e8ac8d28 100644
--- a/tests/testing_tests/test_estimators.py
+++ b/tests/testing_tests/test_estimators.py
@@ -185,7 +185,7 @@ def test_estimate_coefficient(self):
             instrument="Z",
         )
         coefficient, [low, high] = iv_estimator.estimate_coefficient()
-        self.assertEqual(coefficient, 2)
+        self.assertEqual(coefficient[0], 2)
 
 
 class TestLinearRegressionEstimator(unittest.TestCase):
@@ -217,7 +217,7 @@ def test_program_11_2(self):
         self.assertEqual(round(model.params["Intercept"] + 90 * model.params["treatments"], 1), 216.9)
 
         # Increasing treatments from 90 to 100 should be the same as 10 times the unit ATE
-        self.assertEqual(round(model.params["treatments"], 1), round(ate, 1))
+        self.assertTrue(all(round(model.params["treatments"], 1) == round(ate_single, 1) for ate_single in ate))
 
     def test_program_11_3(self):
         """Test whether our linear regression implementation produces the same results as program 11.3 (p. 144)."""
@@ -237,7 +237,7 @@ def test_program_11_3(self):
             197.1,
         )
         # Increasing treatments from 90 to 100 should be the same as 10 times the unit ATE
-        self.assertEqual(round(model.params["treatments"], 3), round(ate, 3))
+        self.assertTrue(all(round(model.params["treatments"], 3) == round(ate_single, 3) for ate_single in ate))
 
     def test_program_15_1A(self):
         """Test whether our linear regression implementation produces the same results as program 15.1 (p. 163, 184)."""
@@ -315,8 +315,9 @@ def test_program_15_no_interaction(self):
         # terms_to_square = ["age", "wt71", "smokeintensity", "smokeyrs"]
         # for term_to_square in terms_to_square:
         ate, [ci_low, ci_high] = linear_regression_estimator.estimate_coefficient()
-        self.assertEqual(round(ate, 1), 3.5)
-        self.assertEqual([round(ci_low, 1), round(ci_high, 1)], [2.6, 4.3])
+
+        self.assertEqual(round(ate[0], 1), 3.5)
+        self.assertEqual([round(ci_low[0], 1), round(ci_high[0], 1)], [2.6, 4.3])
 
     def test_program_15_no_interaction_ate(self):
         """Test whether our linear regression implementation produces the same results as program 15.1 (p. 163, 184)
@@ -350,8 +351,8 @@ def test_program_15_no_interaction_ate(self):
         # terms_to_square = ["age", "wt71", "smokeintensity", "smokeyrs"]
         # for term_to_square in terms_to_square:
         ate, [ci_low, ci_high] = linear_regression_estimator.estimate_ate()
-        self.assertEqual(round(ate, 1), 3.5)
-        self.assertEqual([round(ci_low, 1), round(ci_high, 1)], [2.6, 4.3])
+        self.assertEqual(round(ate[0], 1), 3.5)
+        self.assertEqual([round(ci_low[0], 1), round(ci_high[0], 1)], [2.6, 4.3])
 
     def test_program_15_no_interaction_ate_calculated(self):
         """Test whether our linear regression implementation produces the same results as program 15.1 (p. 163, 184)
@@ -388,8 +389,8 @@ def test_program_15_no_interaction_ate_calculated(self):
         ate, [ci_low, ci_high] = linear_regression_estimator.estimate_ate_calculated(
             adjustment_config={k: self.nhefs_df.mean()[k] for k in covariates}
         )
-        self.assertEqual(round(ate, 1), 3.5)
-        self.assertEqual([round(ci_low, 1), round(ci_high, 1)], [1.9, 5])
+        self.assertEqual(round(ate[0], 1), 3.5)
+        self.assertEqual([round(ci_low[0], 1), round(ci_high[0], 1)], [1.9, 5])
 
     def test_program_11_2_with_robustness_validation(self):
         """Test whether our linear regression estimator, as used in test_program_11_2 can correctly estimate robustness."""
@@ -433,8 +434,8 @@ def test_program_11_3_cublic_spline(self):
         ate_2 = cublic_spline_estimator.estimate_ate_calculated()
 
         # Doubling the treatemebnt value should roughly but not exactly double the ATE
-        self.assertNotEqual(ate_1 * 2, ate_2)
-        self.assertAlmostEqual(ate_1 * 2, ate_2)
+        self.assertNotEqual(ate_1[0] * 2, ate_2[0])
+        self.assertAlmostEqual(ate_1[0] * 2, ate_2[0])
 
 
 class TestCausalForestEstimator(unittest.TestCase):
@@ -470,8 +471,8 @@ def test_program_15_ate(self):
         }
         causal_forest = CausalForestEstimator("qsmk", 1, 0, covariates, "wt82_71", df, {"smokeintensity": 40})
         ate, _ = causal_forest.estimate_ate()
-        self.assertGreater(round(ate, 1), 2.5)
-        self.assertLess(round(ate, 1), 4.5)
+        self.assertGreater(round(ate[0], 1), 2.5)
+        self.assertLess(round(ate[0], 1), 4.5)
 
     def test_program_15_cate(self):
         """Test whether our causal forest implementation produces the similar CATE to program 15.1 (p. 163, 184)."""
@@ -517,7 +518,7 @@ def test_X1_effect(self):
             "X1", 1, 0, {"X2"}, "Y", effect_modifiers={"x2": 0}, formula="Y ~ X1 + X2 + (X1 * X2)", df=self.df
         )
         test_results = lr_model.estimate_ate()
-        ate = test_results[0]
+        ate = test_results[0][0]
         self.assertAlmostEqual(ate, 2.0)
 
     def test_categorical_confidence_intervals(self):