diff --git a/viz_scripts/auxiliary_files/mode_labels.csv b/viz_scripts/auxiliary_files/mode_labels.csv index 9397d75..551d4ee 100644 --- a/viz_scripts/auxiliary_files/mode_labels.csv +++ b/viz_scripts/auxiliary_files/mode_labels.csv @@ -30,3 +30,4 @@ the_friend who drives us to work was running errands after the shift before drop not_a_trip,not_a_trip,Not a Trip no_travel,,No Travel same_mode,,Same Mode +air,,Airplane diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb index 6ced8fc..f93de0d 100644 --- a/viz_scripts/generic_metrics.ipynb +++ b/viz_scripts/generic_metrics.ipynb @@ -419,7 +419,7 @@ "try:\n", " ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n", " ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n", - " labeled_land_trips_df = expanded_ct[expanded_ct['Mode_confirm'] != \"Airplane\"] if \"Mode_confirm\" in expanded_ct.columns else None\n", + " labeled_land_trips_df = expanded_ct[expanded_ct['mode_confirm'] != \"air\"] if \"mode_confirm\" in expanded_ct.columns else None\n", " sensed_land_trips_df = expanded_ct_sensed[expanded_ct_sensed['primary_mode'] != \"AIR_OR_HSR\"]\n", " \n", " sensed_land_quality_text = f\"{len(sensed_land_trips_df)} trips ({round(len(sensed_land_trips_df)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(sensed_land_trips_df)} {sensed_match.group(3)}\"\n", diff --git a/viz_scripts/plots.py b/viz_scripts/plots.py index e7a9742..9879ba8 100644 --- a/viz_scripts/plots.py +++ b/viz_scripts/plots.py @@ -44,18 +44,35 @@ def merge_small_entries(labels, values): v2l_df = v2l_df.drop(small_chunk.index) disp.display(v2l_df) - # This part if a bit tricky - # We could have already had a non-zero other, and it could be small or large - if "Other" not in v2l_df.index: - # zero other will end up with misc_count - if misc_count.vals > 0: + # we need to let the sensed be "OTHER" and labeled or inferred be "Other" + # going to use capitalization as a flag + if v2l_df.index[0][-1].isupper(): + print("Found uppercase last letter") + # This part if a bit tricky + # We could have already had a non-zero other, and it could be small or large + if "OTHER" not in v2l_df.index: + # zero other will end up with misc_count + if misc_count.vals > 0: + v2l_df.loc["OTHER"] = misc_count + elif "OTHER" in small_chunk.index: + # non-zero small other will already be in misc_count + v2l_df.loc["OTHER"] = misc_count + else: + # non-zero large other, will not already be in misc_count + v2l_df.loc["OTHER"] = v2l_df.loc["OTHER"] + misc_count + else: #assuming labeled or inferred + # This part if a bit tricky + # We could have already had a non-zero other, and it could be small or large + if "Other" not in v2l_df.index: + # zero other will end up with misc_count + if misc_count.vals > 0: + v2l_df.loc["Other"] = misc_count + elif "Other" in small_chunk.index: + # non-zero small other will already be in misc_count v2l_df.loc["Other"] = misc_count - elif "Other" in small_chunk.index: - # non-zero small other will already be in misc_count - v2l_df.loc["Other"] = misc_count - else: - # non-zero large other, will not already be in misc_count - v2l_df.loc["Other"] = v2l_df.loc["Other"] + misc_count + else: + # non-zero large other, will not already be in misc_count + v2l_df.loc["Other"] = v2l_df.loc["Other"] + misc_count disp.display(v2l_df) diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index e2abc57..7eec6fd 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -198,7 +198,7 @@ def translate_labels(labels): # Input: dynamic_labels, dic_re, and dic_pur # Output: Dictionary mapping between color with mode/purpose/sensed def mapping_color_labels(dynamic_labels, dic_re, dic_pur): - sensed_values = ["WALKING", "BICYCLING", "IN_VEHICLE", "AIR_OR_HSR", "UNKNOWN", "OTHER", "Other"] + sensed_values = ["WALKING", "BICYCLING", "IN_VEHICLE", "AIR_OR_HSR", "UNKNOWN", "OTHER", "INVALID"] if len(dynamic_labels) > 0: mode_values = list(mapping_labels(dynamic_labels, "MODE").values()) if "MODE" in dynamic_labels else [] replaced_mode_values = list(mapping_labels(dynamic_labels, "REPLACED_MODE").values()) if "REPLACED_MODE" in dynamic_labels else [] @@ -238,10 +238,28 @@ def load_viz_notebook_sensor_inference_data(year, month, program, include_test_u participant_ct_df = load_all_participant_trips(program, tq, include_test_users) expanded_ct = participant_ct_df print(f"Loaded expanded_ct with length {len(expanded_ct)} for {tq}") + + #TODO-this is also in the admin dash, can we unify? + get_max_mode_from_summary = lambda md: ( + "INVALID" + if not isinstance(md, dict) + or "distance" not in md + or not isinstance(md["distance"], dict) + # If 'md' is a dictionary and 'distance' is a valid key pointing to a dictionary: + else ( + # Get the maximum value from 'md["distance"]' using the values of 'md["distance"].get' as the key for 'max'. + # This operation only happens if the length of 'md["distance"]' is greater than 0. + # Otherwise, return "INVALID". + max(md["distance"], key=md["distance"].get) + if len(md["distance"]) > 0 + else "INVALID" + ) + ) + if len(expanded_ct) > 0: - expanded_ct["primary_mode_non_other"] = participant_ct_df.cleaned_section_summary.apply(lambda md: max(md["distance"], key=md["distance"].get)) + expanded_ct["primary_mode_non_other"] = participant_ct_df.cleaned_section_summary.apply(get_max_mode_from_summary) expanded_ct.primary_mode_non_other.replace({"ON_FOOT": "WALKING"}, inplace=True) - valid_sensed_modes = ["WALKING", "BICYCLING", "IN_VEHICLE", "AIR_OR_HSR", "UNKNOWN"] + valid_sensed_modes = ["WALKING", "BICYCLING", "IN_VEHICLE", "AIR_OR_HSR", "UNKNOWN", "INVALID"] expanded_ct["primary_mode"] = expanded_ct.primary_mode_non_other.apply(lambda pm: "OTHER" if pm not in valid_sensed_modes else pm) # Change meters to miles