-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* creating "create population count functions" * change sampled to is_sampled and is_census * Create unit test, remove script entry point * Adding docstrings * Adding optional functionality to save output instead of returning * Renaming df's and implementing in apply_estimation * Update mbs_results/estimation/apply_estimation.py Co-authored-by: Wil Roberts <[email protected]> * Update mbs_results/estimation/apply_estimation.py Co-authored-by: Wil Roberts <[email protected]> * Update tests/estimation/test_create_population_counts.py Co-authored-by: Wil Roberts <[email protected]> * Update create_population_counts.py * Formatting comments --------- Co-authored-by: Wil Roberts <[email protected]> Co-authored-by: Wil Roberts <[email protected]>
- Loading branch information
1 parent
6c2529d
commit 23a75ff
Showing
21 changed files
with
295 additions
and
142 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
import pandas as pd | ||
|
||
|
||
def calculate_turnover_sum_count( | ||
df: pd.DataFrame, period: str, strata: str, colname: str, **config | ||
) -> pd.DataFrame: | ||
""" | ||
Calculates turnover sum and count and returns an aggregated dataframe | ||
with the given column name prefixed to the sum and count columns | ||
Parameters | ||
---------- | ||
df : pd.DataFrame | ||
original dataframe containing frotover. Groups by period and strata | ||
period : str | ||
period column name | ||
strata : str | ||
strate column name | ||
colname : str | ||
column name to prefix to the sum and count columns | ||
Returns | ||
------- | ||
pd.DataFrame | ||
A grouped dataframe with the sum and count columns prefixed with colname | ||
""" | ||
|
||
df_pop_count = ( | ||
df.groupby([period, strata]) | ||
.agg(summing=("frotover", "sum"), count=("reference", "size")) | ||
.reset_index() | ||
) | ||
|
||
df_pop_count.rename( | ||
columns={"summing": f"{colname}_turnover_sum", "count": f"{colname}_count"}, | ||
inplace=True, | ||
) | ||
|
||
return df_pop_count | ||
|
||
|
||
def create_population_count_output( | ||
df: pd.DataFrame, | ||
period: str, | ||
strata: str, | ||
output_path: str = "", | ||
save_output: bool = False, | ||
**config: dict, | ||
) -> pd.DataFrame: | ||
""" | ||
creates the population count output | ||
Parameters | ||
---------- | ||
df : pd.DataFrame | ||
original dataframe frotover and sampled. Groups by period and strata | ||
period : str | ||
period column name | ||
strata : str | ||
strata column name | ||
output_path : str, optional | ||
Output path to save dataframe | ||
save_output : bool, optional | ||
Default False. If True, saves the output to output_path | ||
Returns | ||
------- | ||
pd.DataFrame | ||
A grouped dataframe with the sum and count columns prefixed with colname. | ||
Contains both population and sampled sum and counts for output. | ||
Returns none if save_output is True | ||
""" | ||
|
||
df_population = calculate_turnover_sum_count( | ||
df, period, strata, colname="population", **config | ||
) | ||
|
||
df_sampled = calculate_turnover_sum_count( | ||
df.loc[df["is_sampled"]], period, strata, colname="sample", **config | ||
) | ||
combined = pd.merge(df_population, df_sampled, on=[period, strata]) | ||
|
||
if save_output: | ||
combined.to_csv(output_path + "population_counts.csv", index=False) | ||
return | ||
else: | ||
return combined |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
8 changes: 4 additions & 4 deletions
8
tests/data/estimation/pre_processing_estimation/derive_estimation_variables.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
reference,cell_no,auxiliary,period,sampled,calibration_group | ||
11111111111,123456,1111111111111,202401,1,123456 | ||
22222222222,234567,2222222222222,202401,1,123456 | ||
33333333333,345678,3333333333333,202401,0,345678 | ||
reference,cell_no,auxiliary,period,is_sampled,calibration_group | ||
11111111111,123456,1111111111111,202401,True,123456 | ||
22222222222,234567,2222222222222,202401,True,123456 | ||
33333333333,345678,3333333333333,202401,False,345678 |
30 changes: 15 additions & 15 deletions
30
tests/data/outlier_detection/calculate_predicted_unit_value/predicted_unit_value_data.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,15 @@ | ||
group,period,aux,sampled,a_weight,g_weight,target_variable,nw_ag_flag | ||
101,202401,10,0,1.666666667,1.023809524,12,False | ||
101,202401,23,1,1.666666667,1.023809524,20,False | ||
101,202401,41,1,1.666666667,1.023809524,20,False | ||
101,202402,53,1,1.666666667,1.023809524,40,False | ||
101,202401,12,0,1.666666667,1.023809524,10,False | ||
102,202401,50,1,2.5,1.023809524,60,False | ||
102,202402,40,1,2.5,1.023809524,50,False | ||
102,202401,45,0,2.5,1.023809524,50,False | ||
102,202401,70,0,2.5,1.023809524,60,False | ||
102,202401,86,0,2.5,1.023809524,90,False | ||
103,202401,20,0,0.32,0.004,90,True | ||
103,202401,30,0,0.32,0.004,90,True | ||
104,202401,20,0,,0.004,90,False | ||
104,202401,30,0,,0.004,90,False | ||
group,period,aux,is_census,a_weight,g_weight,target_variable,nw_ag_flag | ||
101,202401,10,True,1.666666667,1.023809524,12,False | ||
101,202401,23,False,1.666666667,1.023809524,20,False | ||
101,202401,41,False,1.666666667,1.023809524,20,False | ||
101,202402,53,False,1.666666667,1.023809524,40,False | ||
101,202401,12,True,1.666666667,1.023809524,10,False | ||
102,202401,50,False,2.5,1.023809524,60,False | ||
102,202402,40,False,2.5,1.023809524,50,False | ||
102,202401,45,True,2.5,1.023809524,50,False | ||
102,202401,70,True,2.5,1.023809524,60,False | ||
102,202401,86,True,2.5,1.023809524,90,False | ||
103,202401,20,True,0.32,0.004,90,True | ||
103,202401,30,True,0.32,0.004,90,True | ||
104,202401,20,True,,0.004,90,False | ||
104,202401,30,True,,0.004,90,False |
30 changes: 15 additions & 15 deletions
30
tests/data/outlier_detection/calculate_predicted_unit_value/predicted_unit_value_output.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,15 @@ | ||
group,period,aux,sampled,a_weight,g_weight,target_variable,nw_ag_flag,predicted_unit_value | ||
101,202401,10,0,1.666666667,1.023809524,12,False, | ||
101,202401,23,1,1.666666667,1.023809524,20,False,14.375 | ||
101,202401,41,1,1.666666667,1.023809524,20,False,25.625 | ||
101,202402,53,1,1.666666667,1.023809524,40,False,40 | ||
101,202401,12,0,1.666666667,1.023809524,10,False, | ||
102,202401,50,1,2.5,1.023809524,60,False,60 | ||
102,202402,40,1,2.5,1.023809524,50,False,50 | ||
102,202401,45,0,2.5,1.023809524,50,False, | ||
102,202401,70,0,2.5,1.023809524,60,False, | ||
102,202401,86,0,2.5,1.023809524,90,False, | ||
103,202401,20,0,0.32,0.004,90,True, | ||
103,202401,30,0,0.32,0.004,90,True, | ||
104,202401,20,0,,0.004,90,False, | ||
104,202401,30,0,,0.004,90,False, | ||
group,period,aux,is_census,a_weight,g_weight,target_variable,nw_ag_flag,predicted_unit_value | ||
101,202401,10,True,1.666666667,1.023809524,12,False, | ||
101,202401,23,False,1.666666667,1.023809524,20,False,14.375 | ||
101,202401,41,False,1.666666667,1.023809524,20,False,25.625 | ||
101,202402,53,False,1.666666667,1.023809524,40,False,40 | ||
101,202401,12,True,1.666666667,1.023809524,10,False, | ||
102,202401,50,False,2.5,1.023809524,60,False,60 | ||
102,202402,40,False,2.5,1.023809524,50,False,50 | ||
102,202401,45,True,2.5,1.023809524,50,False, | ||
102,202401,70,True,2.5,1.023809524,60,False, | ||
102,202401,86,True,2.5,1.023809524,90,False, | ||
103,202401,20,True,0.32,0.004,90,True, | ||
103,202401,30,True,0.32,0.004,90,True, | ||
104,202401,20,True,,0.004,90,False, | ||
104,202401,30,True,,0.004,90,False, |
26 changes: 13 additions & 13 deletions
26
tests/data/outlier_detection/calculate_ratio_estimation/ratio_estimation_data.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,13 @@ | ||
group,period,aux,sampled,a_weight,g_weight,target_variable,nw_ag_flag,predicted_unit_value,l_value | ||
101,202401,10,0,1.666666667,1.023809524,12,False,, | ||
101,202401,23,1,1.666666667,1.023809524,20,False,14.375,0.5 | ||
101,202401,41,1,1.666666667,1.023809524,20,False,25.625,0.5 | ||
101,202402,53,1,1.666666667,1.023809524,40,False,40,0.5 | ||
101,202401,12,0,1.666666667,1.023809524,10,False,, | ||
102,202401,50,1,2.5,1.023809524,60,False,60,0.5 | ||
102,202402,40,1,2.5,1.023809524,50,False,50,0.5 | ||
102,202401,45,0,2.5,1.023809524,50,False,, | ||
102,202401,70,0,2.5,1.023809524,60,False,, | ||
102,202401,86,0,2.5,1.023809524,90,False,, | ||
104,202401,20,0,,0.004,90,False,, | ||
104,202401,30,0,,0.004,90,False,, | ||
group,period,aux,is_census,a_weight,g_weight,target_variable,nw_ag_flag,predicted_unit_value,l_value | ||
101,202401,10,True,1.666666667,1.023809524,12,False,, | ||
101,202401,23,False,1.666666667,1.023809524,20,False,14.375,0.5 | ||
101,202401,41,False,1.666666667,1.023809524,20,False,25.625,0.5 | ||
101,202402,53,False,1.666666667,1.023809524,40,False,40,0.5 | ||
101,202401,12,True,1.666666667,1.023809524,10,False,, | ||
102,202401,50,False,2.5,1.023809524,60,False,60,0.5 | ||
102,202402,40,False,2.5,1.023809524,50,False,50,0.5 | ||
102,202401,45,True,2.5,1.023809524,50,False,, | ||
102,202401,70,True,2.5,1.023809524,60,False,, | ||
102,202401,86,True,2.5,1.023809524,90,False,, | ||
104,202401,20,True,,0.004,90,False,, | ||
104,202401,30,True,,0.004,90,False,, |
26 changes: 13 additions & 13 deletions
26
tests/data/outlier_detection/calculate_ratio_estimation/ratio_estimation_data_output.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,13 @@ | ||
group,period,aux,sampled,a_weight,g_weight,target_variable,nw_ag_flag,predicted_unit_value,l_value,ratio_estimation_treshold | ||
101,202401,10,0,1.666666667,1.023809524,12,False,,, | ||
101,202401,23,1,1.666666667,1.023809524,20,False,14.375,0.5,15.0828652 | ||
101,202401,41,1,1.666666667,1.023809524,20,False,25.625,0.5,26.3328652 | ||
101,202402,53,1,1.666666667,1.023809524,40,False,40,0.5,40.7078652 | ||
101,202401,12,0,1.666666667,1.023809524,10,False,,, | ||
102,202401,50,1,2.5,1.023809524,60,False,60,0.5,60.3206107 | ||
102,202402,40,1,2.5,1.023809524,50,False,50,0.5,50.3206107 | ||
102,202401,45,0,2.5,1.023809524,50,False,,, | ||
102,202401,70,0,2.5,1.023809524,60,False,,, | ||
102,202401,86,0,2.5,1.023809524,90,False,,, | ||
104,202401,20,0,,0.004,90,False,,, | ||
104,202401,30,0,,0.004,90,False,,, | ||
group,period,aux,is_census,a_weight,g_weight,target_variable,nw_ag_flag,predicted_unit_value,l_value,ratio_estimation_treshold | ||
101,202401,10,True,1.666666667,1.023809524,12,False,,, | ||
101,202401,23,False,1.666666667,1.023809524,20,False,14.375,0.5,15.0828652 | ||
101,202401,41,False,1.666666667,1.023809524,20,False,25.625,0.5,26.3328652 | ||
101,202402,53,False,1.666666667,1.023809524,40,False,40,0.5,40.7078652 | ||
101,202401,12,True,1.666666667,1.023809524,10,False,,, | ||
102,202401,50,False,2.5,1.023809524,60,False,60,0.5,60.3206107 | ||
102,202402,40,False,2.5,1.023809524,50,False,50,0.5,50.3206107 | ||
102,202401,45,True,2.5,1.023809524,50,False,,, | ||
102,202401,70,True,2.5,1.023809524,60,False,,, | ||
102,202401,86,True,2.5,1.023809524,90,False,,, | ||
104,202401,20,True,,0.004,90,False,,, | ||
104,202401,30,True,,0.004,90,False,,, |
Oops, something went wrong.