From 1f872e81fdfece7ac86b96b543adfd18d4c682be Mon Sep 17 00:00:00 2001 From: Abby Wheelis Date: Mon, 22 Jul 2024 12:04:53 -0600 Subject: [PATCH 1/5] updates to land modes switching the condition from the display mode to the underlying mode --- viz_scripts/generic_metrics.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb index 6ced8fc..f93de0d 100644 --- a/viz_scripts/generic_metrics.ipynb +++ b/viz_scripts/generic_metrics.ipynb @@ -419,7 +419,7 @@ "try:\n", " ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n", " ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n", - " labeled_land_trips_df = expanded_ct[expanded_ct['Mode_confirm'] != \"Airplane\"] if \"Mode_confirm\" in expanded_ct.columns else None\n", + " labeled_land_trips_df = expanded_ct[expanded_ct['mode_confirm'] != \"air\"] if \"mode_confirm\" in expanded_ct.columns else None\n", " sensed_land_trips_df = expanded_ct_sensed[expanded_ct_sensed['primary_mode'] != \"AIR_OR_HSR\"]\n", " \n", " sensed_land_quality_text = f\"{len(sensed_land_trips_df)} trips ({round(len(sensed_land_trips_df)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(sensed_land_trips_df)} {sensed_match.group(3)}\"\n", From 08f5b2f2ed447216caef6c519f78a106b2796bd6 Mon Sep 17 00:00:00 2001 From: Abby Wheelis Date: Fri, 6 Sep 2024 10:49:03 -0600 Subject: [PATCH 2/5] set nan sensed summaries as "UNKNOWN" --- viz_scripts/scaffolding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index e2abc57..53189b7 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -239,7 +239,7 @@ def load_viz_notebook_sensor_inference_data(year, month, program, include_test_u expanded_ct = participant_ct_df print(f"Loaded expanded_ct with length {len(expanded_ct)} for {tq}") if len(expanded_ct) > 0: - expanded_ct["primary_mode_non_other"] = participant_ct_df.cleaned_section_summary.apply(lambda md: max(md["distance"], key=md["distance"].get)) + expanded_ct["primary_mode_non_other"] = participant_ct_df.cleaned_section_summary.apply(lambda md: max(md["distance"], key=md["distance"].get) if not isinstance(md, float) else "UNKNOWN") expanded_ct.primary_mode_non_other.replace({"ON_FOOT": "WALKING"}, inplace=True) valid_sensed_modes = ["WALKING", "BICYCLING", "IN_VEHICLE", "AIR_OR_HSR", "UNKNOWN"] expanded_ct["primary_mode"] = expanded_ct.primary_mode_non_other.apply(lambda pm: "OTHER" if pm not in valid_sensed_modes else pm) From 59debf178d6622219f6e06504172da8be87f7ca2 Mon Sep 17 00:00:00 2001 From: Abby Wheelis Date: Fri, 6 Sep 2024 14:45:56 -0600 Subject: [PATCH 3/5] add air-> Airplane to the csvs --- viz_scripts/auxiliary_files/mode_labels.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/viz_scripts/auxiliary_files/mode_labels.csv b/viz_scripts/auxiliary_files/mode_labels.csv index 9397d75..551d4ee 100644 --- a/viz_scripts/auxiliary_files/mode_labels.csv +++ b/viz_scripts/auxiliary_files/mode_labels.csv @@ -30,3 +30,4 @@ the_friend who drives us to work was running errands after the shift before drop not_a_trip,not_a_trip,Not a Trip no_travel,,No Travel same_mode,,Same Mode +air,,Airplane From aac5d72bd514772d58cb6ff6b5e2e990e124ead3 Mon Sep 17 00:00:00 2001 From: Abby Wheelis Date: Mon, 9 Sep 2024 10:59:52 -0600 Subject: [PATCH 4/5] use the same lambda function as the admin dash check more carefully for missing summaries --- viz_scripts/scaffolding.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index 53189b7..f45594f 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -238,8 +238,26 @@ def load_viz_notebook_sensor_inference_data(year, month, program, include_test_u participant_ct_df = load_all_participant_trips(program, tq, include_test_users) expanded_ct = participant_ct_df print(f"Loaded expanded_ct with length {len(expanded_ct)} for {tq}") + + #TODO-this is also in the admin dash, can we unify? + get_max_mode_from_summary = lambda md: ( + "UNKNOWN" + if not isinstance(md, dict) + or "distance" not in md + or not isinstance(md["distance"], dict) + # If 'md' is a dictionary and 'distance' is a valid key pointing to a dictionary: + else ( + # Get the maximum value from 'md["distance"]' using the values of 'md["distance"].get' as the key for 'max'. + # This operation only happens if the length of 'md["distance"]' is greater than 0. + # Otherwise, return "INVALID". + max(md["distance"], key=md["distance"].get) + if len(md["distance"]) > 0 + else "UNKNOWN" + ) + ) + if len(expanded_ct) > 0: - expanded_ct["primary_mode_non_other"] = participant_ct_df.cleaned_section_summary.apply(lambda md: max(md["distance"], key=md["distance"].get) if not isinstance(md, float) else "UNKNOWN") + expanded_ct["primary_mode_non_other"] = participant_ct_df.cleaned_section_summary.apply(get_max_mode_from_summary) expanded_ct.primary_mode_non_other.replace({"ON_FOOT": "WALKING"}, inplace=True) valid_sensed_modes = ["WALKING", "BICYCLING", "IN_VEHICLE", "AIR_OR_HSR", "UNKNOWN"] expanded_ct["primary_mode"] = expanded_ct.primary_mode_non_other.apply(lambda pm: "OTHER" if pm not in valid_sensed_modes else pm) From 0809f2e8762ce83652f0d02d43db5e3dacfb3674 Mon Sep 17 00:00:00 2001 From: Abby Wheelis Date: Tue, 10 Sep 2024 16:35:33 -0600 Subject: [PATCH 5/5] invalid data is INVALID, sensed other as OTHER not Other --- viz_scripts/plots.py | 39 +++++++++++++++++++++++++++----------- viz_scripts/scaffolding.py | 8 ++++---- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/viz_scripts/plots.py b/viz_scripts/plots.py index e7a9742..9879ba8 100644 --- a/viz_scripts/plots.py +++ b/viz_scripts/plots.py @@ -44,18 +44,35 @@ def merge_small_entries(labels, values): v2l_df = v2l_df.drop(small_chunk.index) disp.display(v2l_df) - # This part if a bit tricky - # We could have already had a non-zero other, and it could be small or large - if "Other" not in v2l_df.index: - # zero other will end up with misc_count - if misc_count.vals > 0: + # we need to let the sensed be "OTHER" and labeled or inferred be "Other" + # going to use capitalization as a flag + if v2l_df.index[0][-1].isupper(): + print("Found uppercase last letter") + # This part if a bit tricky + # We could have already had a non-zero other, and it could be small or large + if "OTHER" not in v2l_df.index: + # zero other will end up with misc_count + if misc_count.vals > 0: + v2l_df.loc["OTHER"] = misc_count + elif "OTHER" in small_chunk.index: + # non-zero small other will already be in misc_count + v2l_df.loc["OTHER"] = misc_count + else: + # non-zero large other, will not already be in misc_count + v2l_df.loc["OTHER"] = v2l_df.loc["OTHER"] + misc_count + else: #assuming labeled or inferred + # This part if a bit tricky + # We could have already had a non-zero other, and it could be small or large + if "Other" not in v2l_df.index: + # zero other will end up with misc_count + if misc_count.vals > 0: + v2l_df.loc["Other"] = misc_count + elif "Other" in small_chunk.index: + # non-zero small other will already be in misc_count v2l_df.loc["Other"] = misc_count - elif "Other" in small_chunk.index: - # non-zero small other will already be in misc_count - v2l_df.loc["Other"] = misc_count - else: - # non-zero large other, will not already be in misc_count - v2l_df.loc["Other"] = v2l_df.loc["Other"] + misc_count + else: + # non-zero large other, will not already be in misc_count + v2l_df.loc["Other"] = v2l_df.loc["Other"] + misc_count disp.display(v2l_df) diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index f45594f..7eec6fd 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -198,7 +198,7 @@ def translate_labels(labels): # Input: dynamic_labels, dic_re, and dic_pur # Output: Dictionary mapping between color with mode/purpose/sensed def mapping_color_labels(dynamic_labels, dic_re, dic_pur): - sensed_values = ["WALKING", "BICYCLING", "IN_VEHICLE", "AIR_OR_HSR", "UNKNOWN", "OTHER", "Other"] + sensed_values = ["WALKING", "BICYCLING", "IN_VEHICLE", "AIR_OR_HSR", "UNKNOWN", "OTHER", "INVALID"] if len(dynamic_labels) > 0: mode_values = list(mapping_labels(dynamic_labels, "MODE").values()) if "MODE" in dynamic_labels else [] replaced_mode_values = list(mapping_labels(dynamic_labels, "REPLACED_MODE").values()) if "REPLACED_MODE" in dynamic_labels else [] @@ -241,7 +241,7 @@ def load_viz_notebook_sensor_inference_data(year, month, program, include_test_u #TODO-this is also in the admin dash, can we unify? get_max_mode_from_summary = lambda md: ( - "UNKNOWN" + "INVALID" if not isinstance(md, dict) or "distance" not in md or not isinstance(md["distance"], dict) @@ -252,14 +252,14 @@ def load_viz_notebook_sensor_inference_data(year, month, program, include_test_u # Otherwise, return "INVALID". max(md["distance"], key=md["distance"].get) if len(md["distance"]) > 0 - else "UNKNOWN" + else "INVALID" ) ) if len(expanded_ct) > 0: expanded_ct["primary_mode_non_other"] = participant_ct_df.cleaned_section_summary.apply(get_max_mode_from_summary) expanded_ct.primary_mode_non_other.replace({"ON_FOOT": "WALKING"}, inplace=True) - valid_sensed_modes = ["WALKING", "BICYCLING", "IN_VEHICLE", "AIR_OR_HSR", "UNKNOWN"] + valid_sensed_modes = ["WALKING", "BICYCLING", "IN_VEHICLE", "AIR_OR_HSR", "UNKNOWN", "INVALID"] expanded_ct["primary_mode"] = expanded_ct.primary_mode_non_other.apply(lambda pm: "OTHER" if pm not in valid_sensed_modes else pm) # Change meters to miles