From 77649b5440685c23a0d28d79b74d4ba1a1b9f532 Mon Sep 17 00:00:00 2001
From: Jordan-Day-ONS <57715292+Jday7879@users.noreply.github.com>
Date: Fri, 10 Jan 2025 07:35:27 +0000
Subject: [PATCH] 704 type marker back data (#143)

* Creating backdata unit test

* Testing updating return flag when imputation_flag exists

- Updating unit test data
- intial fir flag now overwritten with bir ...

* frozen change pre-commit fix

* update imputation flags from back data

* updated filepath for input data

* refactored, fillna using forward or backward is depreciated, now using ffill or bfill

* replaced fillna(fillmethod) with either ffill or bfill

* removed final fill method to either ffill or bfill

* Dealing with pandas infer dtype warning

* Adding extra unit test cases

* question columns are correct for back data cases

* Corrected columns in unit test data

* update filtering to use defined bool

* Docstrings and tidying up old comments

---------

Co-authored-by: Wil Roberts <Wil.Roberts@ons.gov.uk>
---
 .../imputation/apply_imputation_link.py       |  24 ++-
 mbs_results/imputation/imputation_flags.py    | 109 +++++++++----
 mbs_results/imputation/ratio_of_means.py      | 150 +++++++++++++++++-
 mbs_results/staging/data_cleaning.py          |  23 +--
 .../back_data_testing/C_FIC_FIC_input.csv     |  21 +++
 .../back_data_testing/C_FIC_FIC_output.csv    |  21 +++
 .../back_data_testing/FIC_FIC_FIC_input.csv   |  13 ++
 .../back_data_testing/FIC_FIC_FIC_output.csv  |  13 ++
 .../FIMC_FIMC_FIMC_input.csv                  |  13 ++
 .../FIMC_FIMC_FIMC_output.csv                 |  13 ++
 .../back_data_testing/FIR_FIR_FIR_input.csv   |  13 ++
 .../back_data_testing/FIR_FIR_FIR_output.csv  |  13 ++
 .../back_data_testing/MC_FIMC_FIMC_input.csv  |  13 ++
 .../back_data_testing/MC_FIMC_FIMC_output.csv |  13 ++
 .../back_data_testing/R_FIR_FIR_input.csv     |  13 ++
 .../back_data_testing/R_FIR_FIR_output.csv    |  13 ++
 .../data_cleaning/test_run_live_or_frozen.csv |   8 -
 .../test_run_live_or_frozen_frozen_output.csv |   4 +
 .../test_run_live_or_frozen_input.csv         |   4 +
 tests/imputation/test_imputation_flags.py     |   5 +
 tests/imputation/test_ratio_of_means.py       |  23 ++-
 .../test_ratio_of_means_back_data.py          | 110 +++++++++++++
 tests/staging/test_data_cleaning.py           |  20 +--
 23 files changed, 573 insertions(+), 79 deletions(-)
 create mode 100644 tests/data/imputation/back_data_testing/C_FIC_FIC_input.csv
 create mode 100644 tests/data/imputation/back_data_testing/C_FIC_FIC_output.csv
 create mode 100644 tests/data/imputation/back_data_testing/FIC_FIC_FIC_input.csv
 create mode 100644 tests/data/imputation/back_data_testing/FIC_FIC_FIC_output.csv
 create mode 100644 tests/data/imputation/back_data_testing/FIMC_FIMC_FIMC_input.csv
 create mode 100644 tests/data/imputation/back_data_testing/FIMC_FIMC_FIMC_output.csv
 create mode 100644 tests/data/imputation/back_data_testing/FIR_FIR_FIR_input.csv
 create mode 100644 tests/data/imputation/back_data_testing/FIR_FIR_FIR_output.csv
 create mode 100644 tests/data/imputation/back_data_testing/MC_FIMC_FIMC_input.csv
 create mode 100644 tests/data/imputation/back_data_testing/MC_FIMC_FIMC_output.csv
 create mode 100644 tests/data/imputation/back_data_testing/R_FIR_FIR_input.csv
 create mode 100644 tests/data/imputation/back_data_testing/R_FIR_FIR_output.csv
 delete mode 100644 tests/data/staging/data_cleaning/test_run_live_or_frozen.csv
 create mode 100755 tests/data/staging/data_cleaning/test_run_live_or_frozen_frozen_output.csv
 create mode 100644 tests/data/staging/data_cleaning/test_run_live_or_frozen_input.csv
 create mode 100644 tests/imputation/test_ratio_of_means_back_data.py

diff --git a/mbs_results/imputation/apply_imputation_link.py b/mbs_results/imputation/apply_imputation_link.py
index 78517e7c..7bb64e26 100644
--- a/mbs_results/imputation/apply_imputation_link.py
+++ b/mbs_results/imputation/apply_imputation_link.py
@@ -56,6 +56,14 @@ def create_and_merge_imputation_values(
     # constructed has to come first to use the result for forward
     # impute from constructed
     imputation_config = {
+        # "backdata": {
+        #     "intermediate_column": "backdata",
+        #     "marker": "backdata",
+        #     # doesn't actually apply a fill so can be forward or back
+        #     "fill_column": target,
+        #     "fill_method": "ffill",
+        #     "link_column": cumulative_forward_link,
+        # },
         "c": {
             "intermediate_column": "constructed",
             "marker": "c",
@@ -145,9 +153,19 @@ def create_impute(df, group, imputation_spec):
     fill_column = imputation_spec["fill_column"]
     fill_method = imputation_spec["fill_method"]
     link_column = imputation_spec["link_column"]
-    df[column_name] = (
-        df.groupby(group)[fill_column].fillna(method=fill_method) * df[link_column]
-    )
+    imputation_spec["marker"]
+
+    if fill_method == "ffill":
+        df[column_name] = df.groupby(group)[fill_column].ffill() * df[link_column]
+    elif fill_method == "bfill":
+        df[column_name] = df.groupby(group)[fill_column].bfill() * df[link_column]
+
+    if "hold_period_0_values" in df.columns:
+        df.loc[df["hold_period_0_values"].notnull(), column_name] = df.loc[
+            df["hold_period_0_values"].notnull(), "hold_period_0_values"
+        ]
+        df.drop(columns="hold_period_0_values", inplace=True)
+
     return df
 
 
diff --git a/mbs_results/imputation/imputation_flags.py b/mbs_results/imputation/imputation_flags.py
index 6d30fa47..b6ed63d8 100644
--- a/mbs_results/imputation/imputation_flags.py
+++ b/mbs_results/imputation/imputation_flags.py
@@ -9,6 +9,7 @@ def generate_imputation_marker(
     reference: str,
     strata: str,
     auxiliary: str,
+    back_data_period: str,
     time_difference=1,
     **kwargs,
 ) -> pd.DataFrame:
@@ -35,7 +36,10 @@ def generate_imputation_marker(
         Column name containing strata information (sic).
     auxiliary : str
         Column name containing auxiliary data.
-    time_difference: int
+    back_data_period : pd.Timestamp
+        Time period used as the back data period. This periods data
+        should not be changed
+    time_difference: int, Optional
         lookup distance for matched pairs
     kwargs : mapping, optional
         A dictionary of keyword arguments passed into func.
@@ -47,6 +51,7 @@ def generate_imputation_marker(
         i.e. the type of imputation method that should be used to fill
         missing returns.
     """
+
     if f"{target}_man" in df.columns:
         flags = ["r", "mc", "fir", "bir", "fimc", "fic", "c"]
         # Check order from Specs
@@ -54,10 +59,18 @@ def generate_imputation_marker(
         flags = ["r", "fir", "bir", "fic", "c"]
 
     create_imputation_logical_columns(
-        df, target, period, reference, strata, auxiliary, time_difference
+        df,
+        target,
+        period,
+        reference,
+        strata,
+        auxiliary,
+        back_data_period,
+        time_difference,
     )
 
     select_cols = [f"{i}_flag_{target}" for i in flags]
+    df.to_csv("temp.csv")
     first_condition_met = [np.where(i)[0][0] for i in df[select_cols].values]
     df[f"imputation_flags_{target}"] = [flags[i] for i in first_condition_met]
     df.drop(columns=select_cols, inplace=True)
@@ -72,6 +85,7 @@ def create_imputation_logical_columns(
     reference: str,
     strata: str,
     auxiliary: str,
+    back_data_period: str,
     time_difference: int = 1,
 ):
     """
@@ -110,31 +124,57 @@ def create_imputation_logical_columns(
 
     df.sort_values([reference, strata, period], inplace=True)
 
-    df[f"r_flag_{target}"] = df[target].notna()
+    if f"imputation_flags_{target}" in df.columns:
+        # Case where back data is present
+        backdata_r_mask = df[f"backdata_flags_{target}"] == "r"
+        backdata_fir_mask = df[f"backdata_flags_{target}"] == "fir"
+        backdata_fimc_mask = df[f"backdata_flags_{target}"] == "fimc"
+        backdata_c_mask = df[f"backdata_flags_{target}"] == "c"
+        backdata_fic_mask = df[f"backdata_flags_{target}"] == "fic"
+
+    else:
+        df["is_backdata"] = df[reference] != df[reference]
+        backdata_r_mask = df[reference] != df[reference]
+        backdata_fir_mask = df[reference] != df[reference]
+        backdata_fimc_mask = df[reference] != df[reference]
+        backdata_c_mask = df[reference] != df[reference]
+        backdata_fic_mask = df[reference] != df[reference]
+        print(backdata_r_mask)
+
+    # if target na but not back data period OR if backdata flag is 'r'
+    df[f"r_flag_{target}"] = (df[target].notna() & ~df["is_backdata"]) | backdata_r_mask
 
     if f"{target}_man" in df.columns:
         df[f"mc_flag_{target}"] = df[f"{target}_man"].notna()
 
-    df[f"fir_flag_{target}"] = flag_rolling_impute(
-        df, time_difference, strata, reference, target, period
-    )
+    df[f"fir_flag_{target}"] = (
+        flag_rolling_impute(df, time_difference, strata, reference, target, period)
+        & ~df["is_backdata"]
+    ) | backdata_fir_mask
 
-    df[f"bir_flag_{target}"] = flag_rolling_impute(
-        df, -time_difference, strata, reference, target, period
-    )
+    df[f"bir_flag_{target}"] = (
+        flag_rolling_impute(df, -time_difference, strata, reference, target, period)
+        & ~df["is_backdata"]
+    ) | backdata_r_mask
 
     if f"{target}_man" in df.columns:
-        df[f"fimc_flag_{target}"] = flag_rolling_impute(
-            df, time_difference, strata, reference, f"{target}_man", period
+        df[f"fimc_flag_{target}"] = (
+            flag_rolling_impute(
+                df, time_difference, strata, reference, f"{target}_man", period
+            )
+            | backdata_fimc_mask
         )
 
         df = imputation_overlaps_mc(df, target, reference, strata)
 
-    construction_conditions = df[target].isna() & df[auxiliary].notna()
+    construction_conditions = (
+        df[target].isna() & df[auxiliary].notna() & ~df["is_backdata"]
+    ) | backdata_c_mask
     df[f"c_flag_{target}"] = np.where(construction_conditions, True, False)
 
-    df[f"fic_flag_{target}"] = flag_rolling_impute(
-        df, time_difference, strata, reference, auxiliary, period
+    df[f"fic_flag_{target}"] = (
+        flag_rolling_impute(df, time_difference, strata, reference, auxiliary, period)
+        | backdata_fic_mask
     )
 
     return df
@@ -173,11 +213,15 @@ def imputation_overlaps_mc(df, target, reference, strata):
         df[column] = np.where(
             df[imputation_marker_column] & df[f"mc_flag_{target}"], False, None
         )
-        df[column] = (
-            df.groupby([strata, reference])[column].fillna(
-                method=direction_single_string + "fill"
-            )
-        ).fillna(True)
+        if direction_single_string == "b":
+            df[column] = (
+                df.groupby([strata, reference])[column].bfill().astype(bool)
+            ).fillna(True)
+        elif direction_single_string == "f":
+            df[column] = (
+                df.groupby([strata, reference])[column].ffill().astype(bool)
+            ).fillna(True)
+
         df[imputation_marker_column] = df[imputation_marker_column] & df[column]
         df.drop(
             columns=[column],
@@ -220,23 +264,28 @@ def flag_rolling_impute(
     pd.Series
     """
 
-    if time_difference < 0:
-        fillmethod = "bfill"
-    elif time_difference > 0:
-        fillmethod = "ffill"
-
     df["fill_group"] = (
         (df[period] - pd.DateOffset(months=1) != df.shift(1)[period])
         | (df[strata].diff(1) != 0)
         | (df[reference].diff(1) != 0)
     ).cumsum()
 
-    boolean_column = (
-        df.groupby(["fill_group"])[target]
-        .fillna(method=fillmethod)
-        .notnull()
-        .mul(df["fill_group"] == df.shift(time_difference)["fill_group"])
-    )
+    if time_difference < 0:
+        boolean_column = (
+            df.groupby(["fill_group"])[target]
+            .bfill()
+            .notnull()
+            .mul(df["fill_group"] == df.shift(time_difference)["fill_group"])
+        )
+
+    elif time_difference > 0:
+        boolean_column = (
+            df.groupby(["fill_group"])[target]
+            .ffill()
+            .notnull()
+            .mul(df["fill_group"] == df.shift(time_difference)["fill_group"])
+        )
+
     df.drop(columns="fill_group", inplace=True)
 
     return boolean_column
diff --git a/mbs_results/imputation/ratio_of_means.py b/mbs_results/imputation/ratio_of_means.py
index eeee9e8f..66c61f49 100644
--- a/mbs_results/imputation/ratio_of_means.py
+++ b/mbs_results/imputation/ratio_of_means.py
@@ -219,6 +219,121 @@ def wrap_get_cumulative_links(
     return df
 
 
+def process_backdata(
+    df: pd.DataFrame, target: str, period: str, back_data_period: str
+) -> pd.DataFrame:
+    """
+    function to process the back data. Removes some values from target column so
+    correct imputation links are calculated
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        original dataframe
+    target : str
+        tartget column name
+    period : str
+        period column name
+    back_data_period : str
+        back data period value
+
+    Returns
+    -------
+    pd.DataFrame
+        dataframe with backdata processed and backdata flags copied to seperate columns
+    """
+    # Bool for if period is back data
+    df["is_backdata"] = df[period] == pd.to_datetime(back_data_period, format="%Y%m")
+    # Copying backdata to seperate column
+    df.loc[df["is_backdata"], f"backdata_{target}"] = df.loc[df["is_backdata"], target]
+    # Copying flags to sep column
+    df[f"backdata_flags_{target}"] = df[f"imputation_flags_{target}"].str.lower()
+
+    # moving mc data into manual construction column for MC imputation
+    df.loc[df[f"backdata_flags_{target}"] == "mc", f"{target}_man"] = df.loc[
+        df[f"backdata_flags_{target}"] == "mc", target
+    ]
+    df.loc[df[f"backdata_flags_{target}"] == "fimc", f"{target}_man"] = df.loc[
+        df[f"backdata_flags_{target}"] == "fimc", target
+    ]
+
+    # removing mc data from target column
+    df.loc[
+        (~df[f"backdata_flags_{target}"].isin(["r"]))
+        & (df[f"backdata_flags_{target}"].notna()),
+        target,
+    ] = None
+
+    return df
+
+
+def reapply_backdata(
+    df: pd.DataFrame, target: str, dropping: bool = False
+) -> pd.DataFrame:
+    """
+    reapply backdata flags and values to ensure no changes are made to back data.
+    will not do anything if backdata is not present in dataframe
+    dropping is optional argument which will drop the copied backdata column
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        original dataframe
+    target : str
+        target column name
+    dropping : bool, optional
+        if true the temp column to store back data will be removed , by default False
+
+    Returns
+    -------
+    pd.DataFrame
+        original dataframe with back data re-applied.
+    """
+    if f"backdata_flags_{target}" in df.columns:
+
+        is_backdata_not_return = (df[f"backdata_flags_{target}"] != "r") & (
+            df["is_backdata"]
+        )
+        df.loc[is_backdata_not_return, target] = df.loc[
+            is_backdata_not_return, f"backdata_{target}"
+        ]
+        df.loc[is_backdata_not_return, f"imputation_flags_{target}"] = df.loc[
+            is_backdata_not_return, f"backdata_flags_{target}"
+        ]
+
+    if dropping:
+        df.drop(columns=["is_backdata"], inplace=True)
+
+    return df
+
+
+def replace_fir_backdata(df: pd.DataFrame, target: str) -> pd.DataFrame:
+    """
+    replaced the target column with back data.
+    this is removed before calculating forwards and backwards links to
+    ensure the correct values are used.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        original dataframe
+    target : str
+        target column name
+
+    Returns
+    -------
+    pd.DataFrame
+        original dataframe with imputed data copied over into the target column.
+
+    """
+    if f"backdata_flags_{target}" in df.columns:
+        df.loc[(df[f"backdata_flags_{target}"].isin(["fir"])), target] = df.loc[
+            (df[f"backdata_flags_{target}"].isin(["fir"])), f"backdata_{target}"
+        ]
+
+    return df
+
+
 def ratio_of_means(
     df: pd.DataFrame,
     target: str,
@@ -226,6 +341,8 @@ def ratio_of_means(
     reference: str,
     strata: str,
     auxiliary: str,
+    current_period: str,
+    revision_period: str,
     filters: pd.DataFrame = None,
     manual_constructions: pd.DataFrame = None,
     imputation_links: Dict[str, str] = {},
@@ -276,6 +393,9 @@ def ratio_of_means(
     # These arguments are used from the majority of functions
 
     # TODO: Consider more elegant solution, or define function arguments explicitly
+    back_data_period = calculate_back_data_period(current_period, revision_period)
+    if f"imputation_flags_{target}" in df.columns:
+        df = process_backdata(df, target, period, back_data_period)
 
     default_columns = {
         "target": target,
@@ -283,6 +403,7 @@ def ratio_of_means(
         "reference": reference,
         "strata": strata,
         "auxiliary": auxiliary,
+        "back_data_period": back_data_period,
     }
 
     if filters is not None:
@@ -319,13 +440,12 @@ def ratio_of_means(
         imputation_types = ("c", "fir", "bir", "fic")
 
     df = (
-        df  # .pipe(
-        #     create_impute_flags,
-        #     **default_columns,
-        #     predictive_auxiliary="f_predictive_auxiliary"
-        # )
+        df
+        # Pass backdata period to calculate imputation link
+        .pipe(replace_fir_backdata, target=target)
         .pipe(generate_imputation_marker, **default_columns)
         .pipe(wrap_get_cumulative_links, **default_columns)
+        .pipe(reapply_backdata, target=target)
         .pipe(
             create_and_merge_imputation_values,
             **default_columns,
@@ -336,6 +456,7 @@ def ratio_of_means(
             construction_link="construction_link",
             imputation_types=imputation_types,
         )
+        .pipe(reapply_backdata, target=target, dropping=True)
     )
 
     # TODO: Reset index needed because of sorting, perhaps reset index
@@ -381,3 +502,22 @@ def ratio_of_means(
     # TODO: Missing extra columns, default values and if filter was applied, all bool
 
     return df
+
+
+def calculate_back_data_period(current_period, revision_period) -> str:
+    current_period = pd.to_datetime(current_period, format="%Y%m")
+    back_data_period = (
+        (current_period - pd.DateOffset(months=revision_period)).date().strftime("%Y%m")
+    )
+    return back_data_period
+
+
+if __name__ == "__main__":
+    from mbs_results.utilities.inputs import load_config
+
+    config = load_config()
+    bdp = calculate_back_data_period(
+        current_period=config["current_period"],
+        revision_period=config["revision_period"],
+    )
+    print(config["current_period"], bdp)
diff --git a/mbs_results/staging/data_cleaning.py b/mbs_results/staging/data_cleaning.py
index 99ad0794..b9ac7bf2 100644
--- a/mbs_results/staging/data_cleaning.py
+++ b/mbs_results/staging/data_cleaning.py
@@ -79,7 +79,7 @@ def enforce_datatypes(
     keep_columns: list,
     master_column_type_dict: dict,
     temporarily_remove_cols: list,
-    **config
+    **config,
 ):
     """
     function to change datatypes of columns based on config file
@@ -187,7 +187,7 @@ def join_manual_constructions(
     reference: str,
     period: str,
     question_no: str = "question_no",
-    **config
+    **config,
 ):
     """
     joins manual construction data from onto main dataframe
@@ -278,9 +278,9 @@ def is_same_dtype(df: pd.DataFrame, df2: pd.DataFrame, col_name: str) -> bool:
 def run_live_or_frozen(
     df: pd.DataFrame,
     target: str or list[str],
-    error_marker: str,
+    status: str,
     state: str = "live",
-    error_values: List[str] = ["E", "W"],
+    error_values: List[str] = ["Check needed"],
 ) -> pd.DataFrame:
     """
     For frozen, therefore target values are converted to null, hence responses
@@ -292,15 +292,15 @@ def run_live_or_frozen(
         Original dataframe.
     target : str or list[str]
         Column(s) to treat as non-response.
-    error_marker : str
-        Column name with error values.
+    status : str
+        Column containing error status.
     state : str, optional
         Function config parameter. The default is "live". "live" state won't do
-        anyting, "frozen" will convert to null the error_values within error_marker
+        anyting, "frozen" will convert to null the error_values within status
     error_values : list[str], optional
-        Values to ignore. The default is ['E', 'W'].
+        Values to ignore. The default is ['Check needed'].
         Mapping:
-        E -> 'Check needed' : '201',
+            'Check needed' : '201', ("E" or "W" for CSW)
             'Clear' : '210',
             'Clear - overridden' : '211'
 
@@ -320,8 +320,9 @@ def run_live_or_frozen(
         )
 
     if state == "frozen":
-        df["frozen_error"] = df.apply(
-            lambda x: x[target] if x[error_marker] in (error_values) else None, axis=1
+        df[f"live_{target}"] = df[target].copy()
+        df[target] = df.apply(
+            lambda x: x[target] if x[status] not in (error_values) else None, axis=1
         )
 
     return df
diff --git a/tests/data/imputation/back_data_testing/C_FIC_FIC_input.csv b/tests/data/imputation/back_data_testing/C_FIC_FIC_input.csv
new file mode 100644
index 00000000..48c0d813
--- /dev/null
+++ b/tests/data/imputation/back_data_testing/C_FIC_FIC_input.csv
@@ -0,0 +1,21 @@
+identifier,date,group,question,other,imputation_flags_question
+120001,202001,100,5240,50,R
+120001,202002,100,2490,50
+120001,202003,100,3382,50
+120001,202004,100,4475,50
+120001,202005,100,1316,50
+120002,202001,100,7410,78,R
+120002,202002,100,3602,78
+120002,202003,100,4972,78
+120002,202004,100,8838,78
+120002,202005,100,1535,78
+120003,202001,100,4530,94,R
+120003,202002,100,7451,94
+120003,202003,100,7586,94
+120003,202004,100,283,94
+120003,202005,100,4416,94
+120004,202001,100,7738.738739,100,C
+120004,202002,100,,100
+120004,202003,100,,100
+120004,202004,100,,100
+120004,202005,100,,100
diff --git a/tests/data/imputation/back_data_testing/C_FIC_FIC_output.csv b/tests/data/imputation/back_data_testing/C_FIC_FIC_output.csv
new file mode 100644
index 00000000..f9b34c4e
--- /dev/null
+++ b/tests/data/imputation/back_data_testing/C_FIC_FIC_output.csv
@@ -0,0 +1,21 @@
+identifier,date,group,output,imputation_flags_question,forward,backward,construction,count_forward,count_backward,count_construction,default_forward,default_backward,default_construction
+120001,202001,100,5240,R,1,1.268552019,77.38738739,0,3,3,true,false,false
+120001,202002,100,2490,R,0.788300349,0.849623589,61.0045045,3,3,3,false,false,false
+120001,202003,100,3382,R,1.176991804,1.172403648,71.8018018,3,3,3,false,false,false
+120001,202004,100,4475,R,0.852948557,1.870923352,61.24324324,3,3,3,false,false,false
+120001,202005,100,1316,R,0.53449544,1,32.73423423,3,0,3,false,true,false
+120002,202001,100,7410,R,1,1.268552019,77.38738739,0,3,3,true,false,false
+120002,202002,100,3602,R,0.788300349,0.849623589,61.0045045,3,3,3,false,false,false
+120002,202003,100,4972,R,1.176991804,1.172403648,71.8018018,3,3,3,false,false,false
+120002,202004,100,8838,R,0.852948557,1.870923352,61.24324324,3,3,3,false,false,false
+120002,202005,100,1535,R,0.53449544,1,32.73423423,3,0,3,false,true,false
+120003,202001,100,4530,R,1,1.268552019,77.38738739,0,3,3,true,false,false
+120003,202002,100,7451,R,0.788300349,0.849623589,61.0045045,3,3,3,false,false,false
+120003,202003,100,7586,R,1.176991804,1.172403648,71.8018018,3,3,3,false,false,false
+120003,202004,100,283,R,0.852948557,1.870923352,61.24324324,3,3,3,false,false,false
+120003,202005,100,4416,R,0.53449544,1,32.73423423,3,0,3,false,true,false
+120004,202001,100,7738.738739,C,1,1.268552019,77.38738739,0,3,3,true,false,false
+120004,202002,100,6100.450451,FIC,0.788300349,0.849623589,61.0045045,3,3,3,false,false,false
+120004,202003,100,7180.180181,FIC,1.176991804,1.172403648,71.8018018,3,3,3,false,false,false
+120004,202004,100,6124.324325,FIC,0.852948557,1.870923352,61.24324324,3,3,3,false,false,false
+120004,202005,100,3273.423424,FIC,0.53449544,1,32.73423423,3,0,3,false,true,false
diff --git a/tests/data/imputation/back_data_testing/FIC_FIC_FIC_input.csv b/tests/data/imputation/back_data_testing/FIC_FIC_FIC_input.csv
new file mode 100644
index 00000000..2e147223
--- /dev/null
+++ b/tests/data/imputation/back_data_testing/FIC_FIC_FIC_input.csv
@@ -0,0 +1,13 @@
+identifier,date,group,question,other,imputation_flags_question
+40001,202001,100,4783,35,R
+40001,202002,100,7902,35
+40001,202003,100,4911,35
+40002,202001,100,442,63,R
+40002,202002,100,3136,63
+40002,202003,100,2115,63
+40003,202001,100,8121,16,R
+40003,202002,100,2151,16
+40003,202003,100,1377,16
+40004,202001,100,9836,78,FIC
+40004,202002,100,,78
+40004,202003,100,,78
diff --git a/tests/data/imputation/back_data_testing/FIC_FIC_FIC_output.csv b/tests/data/imputation/back_data_testing/FIC_FIC_FIC_output.csv
new file mode 100644
index 00000000..2a79af55
--- /dev/null
+++ b/tests/data/imputation/back_data_testing/FIC_FIC_FIC_output.csv
@@ -0,0 +1,13 @@
+identifier,date,group,output,imputation_flags_question,forward,backward,construction,count_forward,count_backward,count_construction,default_forward,default_backward,default_construction
+40001,202001,100,4783,R,1,1.011903859,117.070175438596,0,3,3,TRUE,FALSE,FALSE
+40001,202002,100,7902,R,0.988236176,1.569558491,115.692982,3,3,3,false,false,false
+40001,202003,100,4911,R,0.637121844,1,73.71052632,3,0,3,false,true,false
+40002,202001,100,442,R,1,1.011903859,117.070175438596,0,3,3,TRUE,FALSE,FALSE
+40002,202002,100,3136,R,0.988236176,1.569558491,115.692982,3,3,3,false,false,false
+40002,202003,100,2115,R,0.637121844,1,73.71052632,3,0,3,false,true,false
+40003,202001,100,8121,R,1,1.011903859,117.070175438596,0,3,3,TRUE,FALSE,FALSE
+40003,202002,100,2151,R,0.988236176,1.569558491,115.692982,3,3,3,false,false,false
+40003,202003,100,1377,R,0.637121844,1,73.71052632,3,0,3,false,true,false
+40004,202001,100,9836,FIC,1,1.011903859,117.070175438596,0,3,3,TRUE,FALSE,FALSE
+40004,202002,100,9720.291024,FIC,0.988236176,1.569558491,115.692982,3,3,3,false,false,false
+40004,202003,100,6193.009741,FIC,0.637121844,1,73.71052632,3,0,3,false,true,false
diff --git a/tests/data/imputation/back_data_testing/FIMC_FIMC_FIMC_input.csv b/tests/data/imputation/back_data_testing/FIMC_FIMC_FIMC_input.csv
new file mode 100644
index 00000000..87f97339
--- /dev/null
+++ b/tests/data/imputation/back_data_testing/FIMC_FIMC_FIMC_input.csv
@@ -0,0 +1,13 @@
+identifier,date,group,question,other,imputation_flags_question
+40001,202001,100,4783,35,R
+40001,202002,100,7902,35
+40001,202003,100,4911,35
+40002,202001,100,442,63,R
+40002,202002,100,3136,63
+40002,202003,100,2115,63
+40003,202001,100,8121,16,R
+40003,202002,100,2151,16
+40003,202003,100,1377,16
+40004,202001,100,9836,78,FIMC
+40004,202002,100,,78
+40004,202003,100,,78
diff --git a/tests/data/imputation/back_data_testing/FIMC_FIMC_FIMC_output.csv b/tests/data/imputation/back_data_testing/FIMC_FIMC_FIMC_output.csv
new file mode 100644
index 00000000..7c43f93b
--- /dev/null
+++ b/tests/data/imputation/back_data_testing/FIMC_FIMC_FIMC_output.csv
@@ -0,0 +1,13 @@
+identifier,date,group,output,imputation_flags_question,forward,backward,construction,count_forward,count_backward,count_construction,default_forward,default_backward,default_construction
+40001,202001,100,4783,R,1,1.011903859,117.070175438596,0,3,3,TRUE,FALSE,FALSE
+40001,202002,100,7902,R,0.988236176,1.569558491,115.692982,3,3,3,false,false,false
+40001,202003,100,4911,R,0.637121844,1,73.71052632,3,0,3,false,true,false
+40002,202001,100,442,R,1,1.011903859,117.070175438596,0,3,3,TRUE,FALSE,FALSE
+40002,202002,100,3136,R,0.988236176,1.569558491,115.692982,3,3,3,false,false,false
+40002,202003,100,2115,R,0.637121844,1,73.71052632,3,0,3,false,true,false
+40003,202001,100,8121,R,1,1.011903859,117.070175438596,0,3,3,TRUE,FALSE,FALSE
+40003,202002,100,2151,R,0.988236176,1.569558491,115.692982,3,3,3,false,false,false
+40003,202003,100,1377,R,0.637121844,1,73.71052632,3,0,3,false,true,false
+40004,202001,100,9836,FIMC,1,1.011903859,117.070175438596,0,3,3,TRUE,FALSE,FALSE
+40004,202002,100,9720.291024,FIMC,0.988236176,1.569558491,115.692982,3,3,3,false,false,false
+40004,202003,100,6193.009741,FIMC,0.637121844,1,73.71052632,3,0,3,false,true,false
diff --git a/tests/data/imputation/back_data_testing/FIR_FIR_FIR_input.csv b/tests/data/imputation/back_data_testing/FIR_FIR_FIR_input.csv
new file mode 100644
index 00000000..decf3b8f
--- /dev/null
+++ b/tests/data/imputation/back_data_testing/FIR_FIR_FIR_input.csv
@@ -0,0 +1,13 @@
+identifier,date,group,question,other,imputation_flags_question
+40001,202001,100,4783,35,R
+40001,202002,100,7902,35
+40001,202003,100,4911,35
+40002,202001,100,442,63,R
+40002,202002,100,3136,63
+40002,202003,100,2115,63
+40003,202001,100,8121,16,R
+40003,202002,100,2151,16
+40003,202003,100,1377,16
+40004,202001,100,9836,78,FIR
+40004,202002,100,,78
+40004,202003,100,,78
diff --git a/tests/data/imputation/back_data_testing/FIR_FIR_FIR_output.csv b/tests/data/imputation/back_data_testing/FIR_FIR_FIR_output.csv
new file mode 100644
index 00000000..f8fdf5a2
--- /dev/null
+++ b/tests/data/imputation/back_data_testing/FIR_FIR_FIR_output.csv
@@ -0,0 +1,13 @@
+identifier,date,group,output,imputation_flags_question,forward,backward,construction,count_forward,count_backward,count_construction,default_forward,default_backward,default_construction
+40001,202001,100,4783,R,1,1.011903859,117.070175438596,0,3,3,TRUE,FALSE,FALSE
+40001,202002,100,7902,R,0.988236176,1.569558491,115.692982,3,3,3,FALSE,FALSE,FALSE
+40001,202003,100,4911,R,0.637121844,1,73.71052632,3,0,3,FALSE,TRUE,FALSE
+40002,202001,100,442,R,1,1.011903859,117.070175438596,0,3,3,TRUE,FALSE,FALSE
+40002,202002,100,3136,R,0.988236176,1.569558491,115.692982,3,3,3,FALSE,FALSE,FALSE
+40002,202003,100,2115,R,0.637121844,1,73.71052632,3,0,3,FALSE,TRUE,FALSE
+40003,202001,100,8121,R,1,1.011903859,117.070175438596,0,3,3,TRUE,FALSE,FALSE
+40003,202002,100,2151,R,0.988236176,1.569558491,115.692982,3,3,3,FALSE,FALSE,FALSE
+40003,202003,100,1377,R,0.637121844,1,73.71052632,3,0,3,FALSE,TRUE,FALSE
+40004,202001,100,9836,FIR,1,1.011903859,117.070175438596,0,3,3,TRUE,FALSE,FALSE
+40004,202002,100,9720.291024,FIR,0.988236176,1.569558491,115.692982,3,3,3,FALSE,FALSE,FALSE
+40004,202003,100,6193.009741,FIR,0.637121844,1,73.71052632,3,0,3,FALSE,TRUE,FALSE
diff --git a/tests/data/imputation/back_data_testing/MC_FIMC_FIMC_input.csv b/tests/data/imputation/back_data_testing/MC_FIMC_FIMC_input.csv
new file mode 100644
index 00000000..a9a9bb5e
--- /dev/null
+++ b/tests/data/imputation/back_data_testing/MC_FIMC_FIMC_input.csv
@@ -0,0 +1,13 @@
+identifier,date,group,question,other,imputation_flags_question
+40001,202001,100,4783,35,R
+40001,202002,100,7902,35
+40001,202003,100,4911,35
+40002,202001,100,442,63,R
+40002,202002,100,3136,63
+40002,202003,100,2115,63
+40003,202001,100,8121,16,R
+40003,202002,100,2151,16
+40003,202003,100,1377,16
+40004,202001,100,9836,78,MC
+40004,202002,100,,78
+40004,202003,100,,78
diff --git a/tests/data/imputation/back_data_testing/MC_FIMC_FIMC_output.csv b/tests/data/imputation/back_data_testing/MC_FIMC_FIMC_output.csv
new file mode 100644
index 00000000..7e03188e
--- /dev/null
+++ b/tests/data/imputation/back_data_testing/MC_FIMC_FIMC_output.csv
@@ -0,0 +1,13 @@
+identifier,date,group,output,imputation_flags_question,forward,backward,construction,count_forward,count_backward,count_construction,default_forward,default_backward,default_construction
+40001,202001,100,4783,R,1,1.011903859,117.070175438596,0,3,3,TRUE,FALSE,FALSE
+40001,202002,100,7902,R,0.988236176,1.569558491,115.692982,3,3,3,false,false,false
+40001,202003,100,4911,R,0.637121844,1,73.71052632,3,0,3,false,true,false
+40002,202001,100,442,R,1,1.011903859,117.070175438596,0,3,3,TRUE,FALSE,FALSE
+40002,202002,100,3136,R,0.988236176,1.569558491,115.692982,3,3,3,false,false,false
+40002,202003,100,2115,R,0.637121844,1,73.71052632,3,0,3,false,true,false
+40003,202001,100,8121,R,1,1.011903859,117.070175438596,0,3,3,TRUE,FALSE,FALSE
+40003,202002,100,2151,R,0.988236176,1.569558491,115.692982,3,3,3,false,false,false
+40003,202003,100,1377,R,0.637121844,1,73.71052632,3,0,3,false,true,false
+40004,202001,100,9836,MC,1,1.011903859,117.070175438596,0,3,3,TRUE,FALSE,FALSE
+40004,202002,100,9720.291024,FIMC,0.988236176,1.569558491,115.692982,3,3,3,false,false,false
+40004,202003,100,6193.009741,FIMC,0.637121844,1,73.71052632,3,0,3,false,true,false
diff --git a/tests/data/imputation/back_data_testing/R_FIR_FIR_input.csv b/tests/data/imputation/back_data_testing/R_FIR_FIR_input.csv
new file mode 100644
index 00000000..c0a45b1a
--- /dev/null
+++ b/tests/data/imputation/back_data_testing/R_FIR_FIR_input.csv
@@ -0,0 +1,13 @@
+identifier,date,group,question,other,imputation_flags_question
+40001,202001,100,4783,35,R
+40001,202002,100,7902,35
+40001,202003,100,4911,35
+40002,202001,100,442,63,R
+40002,202002,100,3136,63
+40002,202003,100,2115,63
+40003,202001,100,8121,16,R
+40003,202002,100,2151,16
+40003,202003,100,1377,16
+40004,202001,100,9836,78,R
+40004,202002,100,,78
+40004,202003,100,,78
diff --git a/tests/data/imputation/back_data_testing/R_FIR_FIR_output.csv b/tests/data/imputation/back_data_testing/R_FIR_FIR_output.csv
new file mode 100644
index 00000000..a64b050b
--- /dev/null
+++ b/tests/data/imputation/back_data_testing/R_FIR_FIR_output.csv
@@ -0,0 +1,13 @@
+identifier,date,group,output,imputation_flags_question,forward,backward,construction,count_forward,count_backward,count_construction,default_forward,default_backward,default_construction
+40001,202001,100,4783,R,1,1.011903859,120.7395833,0,3,4,TRUE,FALSE,FALSE
+40001,202002,100,7902,R,0.988236176,1.569558491,115.692982,3,3,3,false,false,false
+40001,202003,100,4911,R,0.637121844,1,73.71052632,3,0,3,false,true,false
+40002,202001,100,442,R,1,1.011903859,120.7395833,0,3,4,TRUE,FALSE,FALSE
+40002,202002,100,3136,R,0.988236176,1.569558491,115.692982,3,3,3,false,false,false
+40002,202003,100,2115,R,0.637121844,1,73.71052632,3,0,3,false,true,false
+40003,202001,100,8121,R,1,1.011903859,120.7395833,0,3,4,TRUE,FALSE,FALSE
+40003,202002,100,2151,R,0.988236176,1.569558491,115.692982,3,3,3,false,false,false
+40003,202003,100,1377,R,0.637121844,1,73.71052632,3,0,3,false,true,false
+40004,202001,100,9836,R,1,1.011903859,120.7395833,0,3,4,TRUE,FALSE,FALSE
+40004,202002,100,9720.291024,FIR,0.988236176,1.569558491,115.692982,3,3,3,false,false,false
+40004,202003,100,6193.009741,FIR,0.637121844,1,73.71052632,3,0,3,false,true,false
diff --git a/tests/data/staging/data_cleaning/test_run_live_or_frozen.csv b/tests/data/staging/data_cleaning/test_run_live_or_frozen.csv
deleted file mode 100644
index 02471894..00000000
--- a/tests/data/staging/data_cleaning/test_run_live_or_frozen.csv
+++ /dev/null
@@ -1,8 +0,0 @@
-target,error,live,frozen,frozen_error
-2,C,2,2,
-7,E,7,,7
-1,O,1,1,
-6,W,6,,6
-3,C,3,3,
-5,E,5,,5
-4,W,4,,4
diff --git a/tests/data/staging/data_cleaning/test_run_live_or_frozen_frozen_output.csv b/tests/data/staging/data_cleaning/test_run_live_or_frozen_frozen_output.csv
new file mode 100755
index 00000000..4d5ca1f0
--- /dev/null
+++ b/tests/data/staging/data_cleaning/test_run_live_or_frozen_frozen_output.csv
@@ -0,0 +1,4 @@
+target,status,live_target
+2,Clear,2
+,Check needed,7
+1,Clear - overridden,1
diff --git a/tests/data/staging/data_cleaning/test_run_live_or_frozen_input.csv b/tests/data/staging/data_cleaning/test_run_live_or_frozen_input.csv
new file mode 100644
index 00000000..bb5411cf
--- /dev/null
+++ b/tests/data/staging/data_cleaning/test_run_live_or_frozen_input.csv
@@ -0,0 +1,4 @@
+target,status
+2,Clear
+7,Check needed
+1,Clear - overridden
diff --git a/tests/imputation/test_imputation_flags.py b/tests/imputation/test_imputation_flags.py
index cd111b1d..291ec763 100644
--- a/tests/imputation/test_imputation_flags.py
+++ b/tests/imputation/test_imputation_flags.py
@@ -44,6 +44,7 @@ def test_imputation_marker(self, imputation_flag_test_data):
             strata="strata",
             auxiliary="auxiliary",
             predictive_auxiliary="f_match_auxiliary",
+            back_data_period=111,
         )
         df_expected_output.drop(
             columns=[
@@ -53,6 +54,8 @@ def test_imputation_marker(self, imputation_flag_test_data):
             ],
             inplace=True,
         )
+        df_output.drop(columns=["is_backdata"], inplace=True)
+
         assert_frame_equal(df_output, df_expected_output)
 
     def test_imputation_marker_manual_construction(
@@ -79,6 +82,7 @@ def test_imputation_marker_manual_construction(
             reference="reference",
             strata="strata",
             auxiliary="auxiliary",
+            back_data_period=111,
         )
         df_expected_output.drop(
             columns=[
@@ -88,5 +92,6 @@ def test_imputation_marker_manual_construction(
             ],
             inplace=True,
         )
+        df_output.drop(columns=["is_backdata"], inplace=True)
 
         assert_frame_equal(df_output, df_expected_output)
diff --git a/tests/imputation/test_ratio_of_means.py b/tests/imputation/test_ratio_of_means.py
index ddc7e395..70d70cc3 100644
--- a/tests/imputation/test_ratio_of_means.py
+++ b/tests/imputation/test_ratio_of_means.py
@@ -57,9 +57,7 @@
 ]
 
 
-pytestmark = pytest.mark.parametrize("base_file_name", scenarios)
-
-
+@pytest.mark.parametrize("base_file_name", scenarios)
 class TestRatioOfMeans:
     def test_ratio_of_means(self, base_file_name):
 
@@ -91,6 +89,8 @@ def test_ratio_of_means(self, base_file_name):
                     "backward": "b_link_question",
                     "construction": "construction_link",
                 },
+                current_period=202001,
+                revision_period=10,
             )
         else:
             actual_output = ratio_of_means(
@@ -101,6 +101,8 @@ def test_ratio_of_means(self, base_file_name):
                 strata="group",
                 auxiliary="other",
                 filters=filter_df,
+                current_period=202001,
+                revision_period=10,
             )
 
         actual_output = actual_output.rename(
@@ -149,7 +151,6 @@ def test_ratio_of_means(self, base_file_name):
             errors="ignore",
             inplace=True,
         )
-        print(expected_output.columns)
         expected_output = expected_output[actual_output.columns]
 
         actual_output = actual_output.sort_values(by=["identifier", "date"])
@@ -166,16 +167,12 @@ def test_ratio_of_means(self, base_file_name):
         assert_frame_equal(actual_output, expected_output, check_dtype=False)
 
 
-pytestmark = pytest.mark.parametrize(
-    "base_file_name", scenarios[len(scenarios) - 10 : len(scenarios)]
-)
-
-
+@pytest.mark.parametrize("mc_base_file_name", scenarios[-10:])
 class TestRatioOfMeansManConstruction:
-    def test_manual_construction_input(self, base_file_name):
-        df = pd.read_csv(scenario_path_prefix + base_file_name + "_input.csv")
+    def test_manual_construction_input(self, mc_base_file_name):
+        df = pd.read_csv(scenario_path_prefix + mc_base_file_name + "_input.csv")
         expected_output = pd.read_csv(
-            scenario_path_prefix + base_file_name + "_output.csv"
+            scenario_path_prefix + mc_base_file_name + "_output.csv"
         )
 
         manual_constructions = df.copy()[
@@ -199,6 +196,8 @@ def test_manual_construction_input(self, base_file_name):
             strata="group",
             auxiliary="other",
             manual_constructions=manual_constructions,
+            current_period=202001,
+            revision_period=10,
         )
 
         expected_output["date"] = convert_column_to_datetime(expected_output["date"])
diff --git a/tests/imputation/test_ratio_of_means_back_data.py b/tests/imputation/test_ratio_of_means_back_data.py
new file mode 100644
index 00000000..bcc011f9
--- /dev/null
+++ b/tests/imputation/test_ratio_of_means_back_data.py
@@ -0,0 +1,110 @@
+import pandas as pd
+import pytest
+from pandas.testing import assert_frame_equal
+
+from mbs_results.imputation.ratio_of_means import ratio_of_means
+
+scenario_path_prefix = "tests/data/imputation/back_data_testing/"
+
+scenarios = [
+    "R_FIR_FIR",
+    "FIR_FIR_FIR",
+    "C_FIC_FIC",
+    "FIC_FIC_FIC",
+    "MC_FIMC_FIMC",
+    "FIMC_FIMC_FIMC",
+]
+
+
+pytestmark = pytest.mark.parametrize("base_file_name", scenarios)
+
+
+class TestRatioOfMeans:
+    def test_ratio_of_means_back_data(self, base_file_name):
+
+        input_data = pd.read_csv(scenario_path_prefix + base_file_name + "_input.csv")
+        expected_output = pd.read_csv(
+            scenario_path_prefix + base_file_name + "_output.csv"
+        )
+
+        # Can't use load_format helper, test cases have date instead of period
+
+        input_data["date"] = pd.to_datetime(input_data["date"], format="%Y%m")
+        expected_output["date"] = pd.to_datetime(expected_output["date"], format="%Y%m")
+
+        actual_output = ratio_of_means(
+            input_data,
+            target="question",
+            period="date",
+            reference="identifier",
+            strata="group",
+            auxiliary="other",
+            current_period=202003,
+            revision_period=2,
+        )
+
+        actual_output = actual_output.rename(
+            columns={
+                "default_link_b_match_question": "default_backward",
+                "default_link_f_match_question": "default_forward",
+                "default_link_flag_construction_matches": "default_construction",
+                "flag_construction_matches_pair_count": "flag_match_pair_count",
+            }
+        )
+
+        actual_output = actual_output.drop(columns=["other"])
+
+        # if stays like this we need a function to load expected data
+        expected_output = expected_output.rename(
+            columns={
+                "output": "question",
+                "forward": "f_link_question",
+                "backward": "b_link_question",
+                "construction": "construction_link",
+                "count_forward": "f_match_question_pair_count",
+                "count_backward": "b_match_question_pair_count",
+                "count_construction": "flag_match_pair_count",
+            }
+        )
+
+        actual_output.drop(columns=["question_man"], errors="ignore", inplace=True)
+        # Temp work around to drop mc column until its fully integrated
+        actual_output.drop(
+            columns=[
+                "b_match_filtered_question",
+                "b_predictive_filtered_question",
+                "b_link_filtered_question",
+                "f_predictive_filtered_question",
+                "f_link_filtered_question",
+                "filtered_question",
+                "cumulative_b_link_filtered_question",
+                "cumulative_f_link_filtered_question",
+            ],
+            errors="ignore",
+            inplace=True,
+        )
+        actual_output.drop(
+            columns=["forward", "backward", "construction"],
+            errors="ignore",
+            inplace=True,
+        )
+        actual_output.drop(
+            columns=["is_backdata", "backdata_flags_question", "backdata_question"],
+            errors="ignore",
+            inplace=True,
+        )
+
+        expected_output = expected_output[actual_output.columns]
+
+        actual_output = actual_output.sort_values(by=["identifier", "date"])
+        expected_output = expected_output.sort_values(by=["identifier", "date"])
+
+        actual_output = actual_output.reset_index(drop=True)
+        expected_output = expected_output.reset_index(drop=True)
+
+        expected_output["imputation_flags_question"] = expected_output[
+            "imputation_flags_question"
+        ].str.lower()
+        expected_output = expected_output.replace({"bi": "bir"})
+
+        assert_frame_equal(actual_output, expected_output, check_dtype=False)
diff --git a/tests/staging/test_data_cleaning.py b/tests/staging/test_data_cleaning.py
index 4d530a00..6b8376ae 100644
--- a/tests/staging/test_data_cleaning.py
+++ b/tests/staging/test_data_cleaning.py
@@ -114,25 +114,25 @@ def test_create_imputation_class(filepath):
 
 def test_run_live_or_frozen(filepath):
 
-    df = pd.read_csv(filepath / "test_run_live_or_frozen.csv")
+    df_in = pd.read_csv(filepath / "test_run_live_or_frozen_input.csv")
 
-    df_in = df.drop(columns=["frozen", "frozen_error"])
-
-    live_ouput = run_live_or_frozen(df_in, "target", "error", "live")
+    expected_frozen_output = pd.read_csv(
+        filepath / "test_run_live_or_frozen_frozen_output.csv"
+    )
 
-    frozen_output = run_live_or_frozen(df_in, "target", "error", "frozen")
+    expected_live_output = df_in.copy()
 
-    expected_output_frozen = df.copy()
+    live_ouput = run_live_or_frozen(df_in, "target", "status", "live")
 
-    expected_output_frozen.drop(columns=["frozen"], inplace=True)
+    frozen_output = run_live_or_frozen(df_in, "target", "status", "frozen")
 
-    assert_frame_equal(frozen_output, expected_output_frozen)
-    assert_frame_equal(live_ouput, df_in)
+    assert_frame_equal(frozen_output, expected_frozen_output)
+    assert_frame_equal(live_ouput, expected_live_output)
 
 
 def test_run_live_or_frozen_exception(filepath):
 
-    df = pd.read_csv(filepath / "test_run_live_or_frozen.csv")
+    df = pd.read_csv(filepath / "test_run_live_or_frozen_input.csv")
 
     with pytest.raises(ValueError):
         run_live_or_frozen(df, "target", "error", "love")