Skip to content

Commit

Permalink
Removed project duplicates when applying BU subsidy
Browse files Browse the repository at this point in the history
  • Loading branch information
QuanMPhm committed May 7, 2024
1 parent 2b04771 commit f9ea305
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 12 deletions.
15 changes: 13 additions & 2 deletions process_report/process_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,8 +452,19 @@ def get_project(row):
BALANCE_FIELD,
]
]
BU_projects = _apply_subsidy(BU_projects, subsidy_amount)
BU_projects.to_csv(output_file)

project_list = BU_projects["Project"].unique()
BU_projects_no_dup = BU_projects.drop_duplicates("Project", inplace=False)
sum_fields = [COST_FIELD, CREDIT_FIELD, BALANCE_FIELD]
for project in project_list:
project_mask = BU_projects["Project"] == project
no_dup_project_mask = BU_projects_no_dup["Project"] == project

sum_fields_sums = BU_projects[project_mask][sum_fields].sum().values
BU_projects_no_dup.loc[no_dup_project_mask, sum_fields] = sum_fields_sums

BU_projects_no_dup = _apply_subsidy(BU_projects_no_dup, subsidy_amount)
BU_projects_no_dup.to_csv(output_file)


def _apply_subsidy(dataframe, subsidy_amount):
Expand Down
49 changes: 39 additions & 10 deletions process_report/tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,27 +349,51 @@ def setUp(self):
"2024-03",
"2024-03",
"2024-03",
"2024-03",
"2024-03",
],
"Manager (PI)": ["PI1", "PI2", "PI3", "PI3", "PI4", "PI4"],
"Manager (PI)": ["PI1", "PI1", "PI2", "PI2", "PI3", "PI3", "PI4", "PI4"],
"Institution": [
"Boston University",
"Boston University",
"Harvard University",
"Boston University",
"Boston University",
"Harvard University", # Test case for non-BU PIs
"Harvard University",
"Boston University",
"Boston University",
],
"Project - Allocation": [
"ProjectA-e6413",
"ProjectA-t575e6", # Test case for project with >1 allocation
"ProjectB-fddgfygg",
"ProjectB-5t143t",
"ProjectC-t14334",
"ProjectD",
"ProjectE-test-r25135",
"ProjectD", # Test case for correctly extracting project name
"ProjectE-test-r25135", # Test case for BU PI with >1 project
"ProjectF",
],
"Cost": [1050, 100, 10000, 1000, 1050, 100],
"Credit": [1000, 100, 0, 0, 1000, 0],
"Balance": [50, 0, 10000, 1000, 50, 100],
"Cost": [1050, 500, 100, 925, 10000, 1000, 1050, 100],
"Credit": [
1000,
0,
100,
900,
0,
0,
1000,
0,
], # Test cases where PI does/dones't have credits alreadys
"Balance": [
50,
500,
0,
25,
10000,
1000,
50,
100,
], # Test case where subsidy does/doesn't cover fully balance
}
self.dataframe = pandas.DataFrame(data)
output_file = tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".csv")
Expand Down Expand Up @@ -411,12 +435,17 @@ def test_apply_BU_subsidy(self):
)

self.assertEqual(4, len(output_df.index))
self.assertEqual(50, output_df.loc[0, "Subsidy"])
self.assertEqual(0, output_df.loc[1, "Subsidy"])
self.assertEqual(1550, output_df.loc[0, "Cost"])
self.assertEqual(1025, output_df.loc[1, "Cost"])
self.assertEqual(1050, output_df.loc[2, "Cost"])
self.assertEqual(100, output_df.loc[3, "Cost"])

self.assertEqual(100, output_df.loc[0, "Subsidy"])
self.assertEqual(25, output_df.loc[1, "Subsidy"])
self.assertEqual(50, output_df.loc[2, "Subsidy"])
self.assertEqual(50, output_df.loc[3, "Subsidy"])

self.assertEqual(0, output_df.loc[0, "Balance"])
self.assertEqual(450, output_df.loc[0, "Balance"])
self.assertEqual(0, output_df.loc[1, "Balance"])
self.assertEqual(0, output_df.loc[2, "Balance"])
self.assertEqual(50, output_df.loc[3, "Balance"])
Expand Down

0 comments on commit f9ea305

Please sign in to comment.