From 2416397b932f471a0f3683059ced41652745a834 Mon Sep 17 00:00:00 2001 From: QuanMPhm Date: Thu, 2 May 2024 16:02:22 -0400 Subject: [PATCH] Patched pandas warnings for SettingWithCopyWarnings and FutureWarnings Without the patches, running the processing script will make Pandas print 2 warnings, SettingWithCopyWarnings and FutureWarnings. The SettingWithCopyWarnings came from the fact that chained indexing was performed on a few dataframes. Since chained indexing may return a copy of the dataframe or dataframe itself, this causes ambiguity on whether assignments to the indexed dataframes would change the original, or merely a copy. This is resolved the warnings by explicitly using the copy() function to make copies of the dataframes. As for the FutureWarnings, these were caused by assigning strings to empty columns, whose values default to NaN (float). Since float is incompatible with strings, this raises a warning. This is fixed by explicitly type casting certain columns --- process_report/process_report.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/process_report/process_report.py b/process_report/process_report.py index aa0af8c..8447e72 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -398,6 +398,7 @@ def add_institution(dataframe: pandas.DataFrame): The list of mappings are defined in `institute_map.json`. """ institute_map = load_institute_map() + dataframe = dataframe.astype({INSTITUTION_FIELD: "str"}) for i, row in dataframe.iterrows(): pi_name = row[PI_FIELD] if pandas.isna(pi_name): @@ -438,9 +439,9 @@ def get_project(row): else: return project_alloc[: project_alloc.rfind("-")] - BU_projects = dataframe[dataframe[INSTITUTION_FIELD] == "Boston University"] + BU_projects = dataframe[dataframe[INSTITUTION_FIELD] == "Boston University"].copy() BU_projects["Project"] = BU_projects.apply(get_project, axis=1) - BU_projects[SUBSIDY_FIELD] = 0 + BU_projects[SUBSIDY_FIELD] = Decimal(0) BU_projects = BU_projects[ [ INVOICE_DATE_FIELD, @@ -496,7 +497,7 @@ def export_lenovo(dataframe: pandas.DataFrame, output_file): SU_HOURS_FIELD, SU_TYPE_FIELD, ] - ] + ].copy() lenovo_df.rename(columns={SU_HOURS_FIELD: "SU Hours"}, inplace=True) lenovo_df.insert(len(lenovo_df.columns), "SU Charge", SU_CHARGE_MULTIPLIER)