From 1883ab0fdea8f0f2499cfe9e09107933476cbe9f Mon Sep 17 00:00:00 2001 From: andretocci Date: Mon, 12 Apr 2021 16:54:17 -0300 Subject: [PATCH 1/5] test: :white_check_mark: create test_mam for future pytest --- tests/test_mam.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/test_mam.py diff --git a/tests/test_mam.py b/tests/test_mam.py new file mode 100644 index 0000000..e69de29 From 8f56835dfd52f70357fe02d82ecdf694b4f650f0 Mon Sep 17 00:00:00 2001 From: andretocci Date: Mon, 12 Apr 2021 16:54:17 -0300 Subject: [PATCH 2/5] test: :white_check_mark: create test_mam for future pytest --- tests/test_mam.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/test_mam.py diff --git a/tests/test_mam.py b/tests/test_mam.py new file mode 100644 index 0000000..e69de29 From 882abc22b1b8f28b9d5b13df85a7890d19b615b4 Mon Sep 17 00:00:00 2001 From: andretocci Date: Fri, 23 Apr 2021 09:23:48 -0300 Subject: [PATCH 3/5] test: :sparkles: pytest implementation --- tests/test_mam.py | 103 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/tests/test_mam.py b/tests/test_mam.py index e69de29..af86afb 100644 --- a/tests/test_mam.py +++ b/tests/test_mam.py @@ -0,0 +1,103 @@ +from marketing_attribution_models import MAM +import pandas as pd +import pytest + +att = None +df_agg = None +conv_value = None +df_journey = None + +def setup_module(module): + global df_agg, att, conv_value, df_journey + conv_value = 3 + df_agg = pd.DataFrame( + {'channels_agg': ["A", "A > B", + "A > B > C", "B", + "B > A","B > C > A", + "C","C > A", + "C > B > A"], + 'conversion_value': [conv_value for i in range(9)]}) + att = MAM(df_agg, + conversion_value='conversion_value', + channels_colname='channels_agg') + + att.attribution_first_click() + att.attribution_last_click() + att.attribution_last_click_non('A') + att.attribution_linear() + att.attribution_position_based() + att.attribution_time_decay(decay_over_time=0.5, frequency=1) + att.attribution_markov() + att.attribution_shapley() + df_journey = att.as_pd_dataframe() + + +def test_journey_results(): + results = [] + df_journey_test = df_journey.copy() + df_journey_test['size'] = df_journey_test[ 'channels_agg'].str.split(' > ').apply(len) + for col in [col for col in df_journey_test.columns if col not in ['channels_agg', 'converted_agg', 'conversion_value', 'size']]: + df_journey_test[col] = df_journey_test[ col].str.split(' > ').apply(len) + results.append(all((df_journey_test[ col] == df_journey_test[ 'size']).values)) + assert all(results) + + +def test_agg_results(): + res_value = df_agg['conversion_value'].sum() + model_results_df = att.group_by_channels_models + model_results_df = model_results_df[[col for col in model_results_df.columns if col != 'channels']] + assert all((model_results_df.sum().round() == res_value).values) + +def test_att_first(): + colname = 'attribution_first_click_heuristic' + df_journey_test = df_journey.copy() + assert all(df_journey_test[ colname].str.split(' > ').apply(lambda x: float(x[0]) == conv_value).values) + + +def test_att_last(): + colname = 'attribution_last_click_heuristic' + df_journey_test = df_journey.copy() + assert all(df_journey_test[ colname].str.split(' > ').apply(lambda x: float(x[-1]) == conv_value).values) + + +def test_att_last_non(): + colname = 'attribution_last_click_non_A_heuristic' + df_journey_test = df_journey.copy() + non_list = ['B > C > A', 'C > A', 'C > B > A'] + df_journey_test = df_journey_test[ df_journey_test[ 'channels_agg'].apply(lambda x: x in non_list)] + assert all(df_journey_test[colname].str.split(' > ').apply(lambda x: float(x[-1]) == 0).values) + + +def test_att_linear(): + colname = 'attribution_linear_heuristic' + df_journey_test = df_journey.copy() + assert all(df_journey_test[ colname].str.split(' > ').apply(lambda x: float(x[0]) == (conv_value / len(x))).values) + + +def test_att_position_based(): + colname = 'attribution_position_based_0.4_0.2_0.4_heuristic' + df_journey_test = df_journey.copy() + df_test = df_journey_test[ df_journey_test[ 'channels_agg'].apply(lambda x: len(x) == 3)] + assert all(df_test[colname].str.split(' > ').apply(lambda x: float(x[0]) == (conv_value * 0.4)).values) + df_test = df_journey_test[ df_journey_test[ 'channels_agg'].apply(lambda x: len(x) == 2)] + assert all(df_test[colname].str.split(' > ').apply(lambda x: float(x[0]) == (conv_value * 0.5)).values) + + +# def test_att_time(): +# colname = 'attribution_time_decay0.5_freq1_heuristic' + + +# def test_att_markov(): +# colname = 'attribution_time_decay0.5_freq1_heuristic' + + +# def test_att_shapley(): +# colname = 'attribution_time_decay0.5_freq1_heuristic' + +if __name__ == '__main__': + setup_module(None) + print(df_journey) + colname = 'attribution_first_click_heuristic' + df_journey_test = df_journey.copy() + print(df_journey_test[ colname].str.split(' > ').apply(lambda x: float(x[0])).values) + print(all(df_journey_test[ colname].str.split(' > ').apply(lambda x: float(x[0]) == conv_value).values)) \ No newline at end of file From 2bbe9e6330f66822a160e2829cca50feb926c2d9 Mon Sep 17 00:00:00 2001 From: andretocci Date: Mon, 26 Apr 2021 17:27:24 -0300 Subject: [PATCH 4/5] style: :art: Black formatter --- tests/test_mam.py | 139 +++++++++++++++++++++++++++++++++------------- 1 file changed, 100 insertions(+), 39 deletions(-) diff --git a/tests/test_mam.py b/tests/test_mam.py index af86afb..8b65a61 100644 --- a/tests/test_mam.py +++ b/tests/test_mam.py @@ -1,29 +1,38 @@ from marketing_attribution_models import MAM -import pandas as pd -import pytest +import pandas as pd att = None df_agg = None conv_value = None df_journey = None -def setup_module(module): + +def setup_module(): global df_agg, att, conv_value, df_journey conv_value = 3 df_agg = pd.DataFrame( - {'channels_agg': ["A", "A > B", - "A > B > C", "B", - "B > A","B > C > A", - "C","C > A", - "C > B > A"], - 'conversion_value': [conv_value for i in range(9)]}) - att = MAM(df_agg, - conversion_value='conversion_value', - channels_colname='channels_agg') - + { + "channels_agg": [ + "A", + "A > B", + "A > B > C", + "B", + "B > A", + "B > C > A", + "C", + "C > A", + "C > B > A", + ], + "conversion_value": [conv_value for i in range(9)], + } + ) + att = MAM( + df_agg, conversion_value="conversion_value", channels_colname="channels_agg" + ) + att.attribution_first_click() att.attribution_last_click() - att.attribution_last_click_non('A') + att.attribution_last_click_non("A") att.attribution_linear() att.attribution_position_based() att.attribution_time_decay(decay_over_time=0.5, frequency=1) @@ -35,52 +44,97 @@ def setup_module(module): def test_journey_results(): results = [] df_journey_test = df_journey.copy() - df_journey_test['size'] = df_journey_test[ 'channels_agg'].str.split(' > ').apply(len) - for col in [col for col in df_journey_test.columns if col not in ['channels_agg', 'converted_agg', 'conversion_value', 'size']]: - df_journey_test[col] = df_journey_test[ col].str.split(' > ').apply(len) - results.append(all((df_journey_test[ col] == df_journey_test[ 'size']).values)) + df_journey_test["size"] = ( + df_journey_test["channels_agg"].str.split(" > ").apply(len) + ) + for col in [ + col + for col in df_journey_test.columns + if col not in ["channels_agg", "converted_agg", "conversion_value", "size"] + ]: + df_journey_test[col] = df_journey_test[col].str.split(" > ").apply(len) + results.append(all((df_journey_test[col] == df_journey_test["size"]).values)) assert all(results) def test_agg_results(): - res_value = df_agg['conversion_value'].sum() + res_value = df_agg["conversion_value"].sum() model_results_df = att.group_by_channels_models - model_results_df = model_results_df[[col for col in model_results_df.columns if col != 'channels']] + model_results_df = model_results_df[ + [col for col in model_results_df.columns if col != "channels"] + ] assert all((model_results_df.sum().round() == res_value).values) + def test_att_first(): - colname = 'attribution_first_click_heuristic' + colname = "attribution_first_click_heuristic" df_journey_test = df_journey.copy() - assert all(df_journey_test[ colname].str.split(' > ').apply(lambda x: float(x[0]) == conv_value).values) + assert all( + df_journey_test[colname] + .str.split(" > ") + .apply(lambda x: float(x[0]) == conv_value) + .values + ) def test_att_last(): - colname = 'attribution_last_click_heuristic' + colname = "attribution_last_click_heuristic" df_journey_test = df_journey.copy() - assert all(df_journey_test[ colname].str.split(' > ').apply(lambda x: float(x[-1]) == conv_value).values) + assert all( + df_journey_test[colname] + .str.split(" > ") + .apply(lambda x: float(x[-1]) == conv_value) + .values + ) def test_att_last_non(): - colname = 'attribution_last_click_non_A_heuristic' + colname = "attribution_last_click_non_A_heuristic" df_journey_test = df_journey.copy() - non_list = ['B > C > A', 'C > A', 'C > B > A'] - df_journey_test = df_journey_test[ df_journey_test[ 'channels_agg'].apply(lambda x: x in non_list)] - assert all(df_journey_test[colname].str.split(' > ').apply(lambda x: float(x[-1]) == 0).values) + non_list = ["B > C > A", "C > A", "C > B > A"] + df_journey_test = df_journey_test[ + df_journey_test["channels_agg"].apply(lambda x: x in non_list) + ] + assert all( + df_journey_test[colname] + .str.split(" > ") + .apply(lambda x: float(x[-1]) == 0) + .values + ) def test_att_linear(): - colname = 'attribution_linear_heuristic' + colname = "attribution_linear_heuristic" df_journey_test = df_journey.copy() - assert all(df_journey_test[ colname].str.split(' > ').apply(lambda x: float(x[0]) == (conv_value / len(x))).values) + assert all( + df_journey_test[colname] + .str.split(" > ") + .apply(lambda x: float(x[0]) == (conv_value / len(x))) + .values + ) def test_att_position_based(): - colname = 'attribution_position_based_0.4_0.2_0.4_heuristic' + colname = "attribution_position_based_0.4_0.2_0.4_heuristic" df_journey_test = df_journey.copy() - df_test = df_journey_test[ df_journey_test[ 'channels_agg'].apply(lambda x: len(x) == 3)] - assert all(df_test[colname].str.split(' > ').apply(lambda x: float(x[0]) == (conv_value * 0.4)).values) - df_test = df_journey_test[ df_journey_test[ 'channels_agg'].apply(lambda x: len(x) == 2)] - assert all(df_test[colname].str.split(' > ').apply(lambda x: float(x[0]) == (conv_value * 0.5)).values) + df_test = df_journey_test[ + df_journey_test["channels_agg"].apply(lambda x: len(x) == 3) + ] + assert all( + df_test[colname] + .str.split(" > ") + .apply(lambda x: float(x[0]) == (conv_value * 0.4)) + .values + ) + df_test = df_journey_test[ + df_journey_test["channels_agg"].apply(lambda x: len(x) == 2) + ] + assert all( + df_test[colname] + .str.split(" > ") + .apply(lambda x: float(x[0]) == (conv_value * 0.5)) + .values + ) # def test_att_time(): @@ -94,10 +148,17 @@ def test_att_position_based(): # def test_att_shapley(): # colname = 'attribution_time_decay0.5_freq1_heuristic' -if __name__ == '__main__': +if __name__ == "__main__": setup_module(None) print(df_journey) - colname = 'attribution_first_click_heuristic' + colname = "attribution_first_click_heuristic" df_journey_test = df_journey.copy() - print(df_journey_test[ colname].str.split(' > ').apply(lambda x: float(x[0])).values) - print(all(df_journey_test[ colname].str.split(' > ').apply(lambda x: float(x[0]) == conv_value).values)) \ No newline at end of file + print(df_journey_test[colname].str.split(" > ").apply(lambda x: float(x[0])).values) + print( + all( + df_journey_test[colname] + .str.split(" > ") + .apply(lambda x: float(x[0]) == conv_value) + .values + ) + ) From e270474f1461c8b4123aa08d9ae3d1cf92449829 Mon Sep 17 00:00:00 2001 From: andretocci Date: Tue, 18 May 2021 15:17:02 -0300 Subject: [PATCH 5/5] style: :art: pylint on pytest --- tests/test_mam.py | 126 +++++++++++++++++++++++++++------------------- 1 file changed, 74 insertions(+), 52 deletions(-) diff --git a/tests/test_mam.py b/tests/test_mam.py index 8b65a61..2b2815b 100644 --- a/tests/test_mam.py +++ b/tests/test_mam.py @@ -1,16 +1,20 @@ -from marketing_attribution_models import MAM import pandas as pd +from marketing_attribution_models import MAM -att = None -df_agg = None -conv_value = None -df_journey = None +ATT = None +DF_AGG = None +CONV_VALUE = None +DF_JOURNEY = None def setup_module(): - global df_agg, att, conv_value, df_journey - conv_value = 3 - df_agg = pd.DataFrame( + """ + Setup module that will create a MAM objetct and + will run attribution models for testing. + """ + global DF_AGG, ATT, CONV_VALUE, DF_JOURNEY + CONV_VALUE = 3 + DF_AGG = pd.DataFrame( { "channels_agg": [ "A", @@ -23,30 +27,37 @@ def setup_module(): "C > A", "C > B > A", ], - "conversion_value": [conv_value for i in range(9)], + "conversion_value": [CONV_VALUE for i in range(9)], } ) - att = MAM( - df_agg, conversion_value="conversion_value", channels_colname="channels_agg" + ATT = MAM( + DF_AGG, conversion_value="conversion_value", channels_colname="channels_agg" ) - att.attribution_first_click() - att.attribution_last_click() - att.attribution_last_click_non("A") - att.attribution_linear() - att.attribution_position_based() - att.attribution_time_decay(decay_over_time=0.5, frequency=1) - att.attribution_markov() - att.attribution_shapley() - df_journey = att.as_pd_dataframe() - - -def test_journey_results(): - results = [] - df_journey_test = df_journey.copy() + ATT.attribution_first_click() + ATT.attribution_last_click() + ATT.attribution_last_click_non("A") + ATT.attribution_linear() + ATT.attribution_position_based() + ATT.attribution_time_decay(decay_over_time=0.5, frequency=1) + ATT.attribution_markov() + ATT.attribution_shapley() + DF_JOURNEY = ATT.as_pd_dataframe() + + +def test_as_pd_dataframe_len(): + """ + Test function that will check if the len returned + on self.as_pd_dataframe() results will be the same + len as the number of channels + """ + results = [] # Results variable + df_journey_test = DF_JOURNEY.copy() df_journey_test["size"] = ( df_journey_test["channels_agg"].str.split(" > ").apply(len) ) + + # For loop on model results columns for col in [ col for col in df_journey_test.columns @@ -58,8 +69,14 @@ def test_journey_results(): def test_agg_results(): - res_value = df_agg["conversion_value"].sum() - model_results_df = att.group_by_channels_models + """ + Test function that will check if the sum of + the model results are the same as the total + of conversions present when creating the MAM + object. + """ + res_value = DF_AGG["conversion_value"].sum() + model_results_df = ATT.group_by_channels_models model_results_df = model_results_df[ [col for col in model_results_df.columns if col != "channels"] ] @@ -67,30 +84,42 @@ def test_agg_results(): def test_att_first(): + """ + Test function to validate the first click method + results. + """ colname = "attribution_first_click_heuristic" - df_journey_test = df_journey.copy() + df_journey_test = DF_JOURNEY.copy() assert all( df_journey_test[colname] .str.split(" > ") - .apply(lambda x: float(x[0]) == conv_value) + .apply(lambda x: float(x[0]) == CONV_VALUE) .values ) def test_att_last(): + """ + Test function to validate the last click method + results. + """ colname = "attribution_last_click_heuristic" - df_journey_test = df_journey.copy() + df_journey_test = DF_JOURNEY.copy() assert all( df_journey_test[colname] .str.split(" > ") - .apply(lambda x: float(x[-1]) == conv_value) + .apply(lambda x: float(x[-1]) == CONV_VALUE) .values ) def test_att_last_non(): + """ + Test function to validate the last click non method + results. + """ colname = "attribution_last_click_non_A_heuristic" - df_journey_test = df_journey.copy() + df_journey_test = DF_JOURNEY.copy() non_list = ["B > C > A", "C > A", "C > B > A"] df_journey_test = df_journey_test[ df_journey_test["channels_agg"].apply(lambda x: x in non_list) @@ -104,26 +133,34 @@ def test_att_last_non(): def test_att_linear(): + """ + Test function to validate the linear method + results. + """ colname = "attribution_linear_heuristic" - df_journey_test = df_journey.copy() + df_journey_test = DF_JOURNEY.copy() assert all( df_journey_test[colname] .str.split(" > ") - .apply(lambda x: float(x[0]) == (conv_value / len(x))) + .apply(lambda x: float(x[0]) == (CONV_VALUE / len(x))) .values ) def test_att_position_based(): + """ + Test function to validate the position based method + results. + """ colname = "attribution_position_based_0.4_0.2_0.4_heuristic" - df_journey_test = df_journey.copy() + df_journey_test = DF_JOURNEY.copy() df_test = df_journey_test[ df_journey_test["channels_agg"].apply(lambda x: len(x) == 3) ] assert all( df_test[colname] .str.split(" > ") - .apply(lambda x: float(x[0]) == (conv_value * 0.4)) + .apply(lambda x: float(x[0]) == (CONV_VALUE * 0.4)) .values ) df_test = df_journey_test[ @@ -132,7 +169,7 @@ def test_att_position_based(): assert all( df_test[colname] .str.split(" > ") - .apply(lambda x: float(x[0]) == (conv_value * 0.5)) + .apply(lambda x: float(x[0]) == (CONV_VALUE * 0.5)) .values ) @@ -147,18 +184,3 @@ def test_att_position_based(): # def test_att_shapley(): # colname = 'attribution_time_decay0.5_freq1_heuristic' - -if __name__ == "__main__": - setup_module(None) - print(df_journey) - colname = "attribution_first_click_heuristic" - df_journey_test = df_journey.copy() - print(df_journey_test[colname].str.split(" > ").apply(lambda x: float(x[0])).values) - print( - all( - df_journey_test[colname] - .str.split(" > ") - .apply(lambda x: float(x[0]) == conv_value) - .values - ) - )