diff --git a/viz_scripts/bin/generate_plots.py b/viz_scripts/bin/generate_plots.py index 38d66c6..65815b2 100644 --- a/viz_scripts/bin/generate_plots.py +++ b/viz_scripts/bin/generate_plots.py @@ -7,7 +7,8 @@ import json import os import sys - +import asyncio +import emcommon.util as emcu # Configuration settings to use for all generated plots by this instance # This could also be specified as a parser argument, if we want to generate plots for all programs from one instance @@ -43,7 +44,11 @@ # dynamic_labels can be referenced from # https://github.com/e-mission/nrel-openpath-deploy-configs/blob/main/label_options/example-study-label-options.json -dynamic_labels = { } +labels = { } + +async def load_default_label_options(): + labels = await emcu.read_json_resource("label-options.default.json") + return labels # Check if the dynamic config contains dynamic labels 'label_options' # Parse through the dynamic_labels_url: @@ -54,10 +59,16 @@ if req.status_code != 200: print(f"Unable to download dynamic_labels_url, status code: {req.status_code} for {STUDY_CONFIG}") else: - dynamic_labels = json.loads(req.text) + labels = json.loads(req.text) print(f"Dynamic labels download was successful for nrel-openpath-deploy-configs: {STUDY_CONFIG}" ) else: - print(f"label_options is unavailable for the dynamic_config in {STUDY_CONFIG}") + # load default labels from e-mission-common + # https://raw.githubusercontent.com/JGreenlee/e-mission-common/refs/heads/master/src/emcommon/resources/label-options.default.json + labels = asyncio.run(load_default_label_options()) + if not labels: + print(f"Unable to load labels for : {STUDY_CONFIG}") + else: + print(f"Labels loading was successful for nrel-openpath-deploy-configs: {STUDY_CONFIG}") if args.date is None: start_date = arrow.get(int(dynamic_config['intro']['start_year']), @@ -88,7 +99,7 @@ def compute_for_date(month, year): study_type=dynamic_config['intro']['program_or_study'], mode_of_interest=mode_studied, include_test_users=dynamic_config.get('metrics', {}).get('include_test_users', False), - dynamic_labels = dynamic_labels, + labels = labels, use_imperial = dynamic_config.get('display_config', {}).get('use_imperial', True), sensed_algo_prefix=dynamic_config.get('metrics', {}).get('sensed_algo_prefix', "cleaned"), bluetooth_only = dynamic_config.get('tracking', {}).get('bluetooth_only', False), diff --git a/viz_scripts/docker/load_mongodump.sh b/viz_scripts/docker/load_mongodump.sh index 64fc71a..dc5da1b 100644 --- a/viz_scripts/docker/load_mongodump.sh +++ b/viz_scripts/docker/load_mongodump.sh @@ -53,6 +53,10 @@ echo "Database Name: $DB_NAME" DB_HOST="mongodb://db/$DB_NAME" sed -i.bak "s|DB_HOST=.*|DB_HOST=$DB_HOST|" "$CONFIG_FILE" +# Update the docker-compose configuration file with the actual STUDY_CONFIG +STUDY_CONFIG=$(echo "$DB_NAME" | sed -E 's/openpath_prod_(.*)$/\1/' | tr '_' '-') +sed -i.bak "s|STUDY_CONFIG=.*|STUDY_CONFIG=$STUDY_CONFIG|" "$CONFIG_FILE" + echo "Updated docker-compose file:" cat "$CONFIG_FILE" diff --git a/viz_scripts/energy_calculations.ipynb b/viz_scripts/energy_calculations.ipynb index 79eccf1..569e1c5 100644 --- a/viz_scripts/energy_calculations.ipynb +++ b/viz_scripts/energy_calculations.ipynb @@ -32,7 +32,7 @@ "mode_of_interest = \"e-bike\"\n", "include_test_users = False\n", "is_debug_mode = False\n", - "dynamic_labels = {}\n", + "labels = {}\n", "use_imperial = True" ] }, @@ -91,7 +91,7 @@ " month,\n", " program,\n", " study_type,\n", - " dynamic_labels,\n", + " labels,\n", " include_test_users=include_test_users,\n", " add_footprint=True)\n", "\n", @@ -224,9 +224,9 @@ " if (is_debug_mode == True):\n", " ebco2_debug = ebco2_compute(data_eb, weight_unit_debug)\n", " if (weight_unit == 'kg'):\n", - " scaffolding.print_CO2_emission_calculations(data_eb, ebco2_debug, ebco2, dynamic_labels)\n", + " scaffolding.print_CO2_emission_calculations(data_eb, ebco2_debug, ebco2, labels)\n", " else:\n", - " scaffolding.print_CO2_emission_calculations(data_eb, ebco2, ebco2_debug, dynamic_labels)\n", + " scaffolding.print_CO2_emission_calculations(data_eb, ebco2, ebco2_debug, labels)\n", "\n", "try:\n", " if use_imperial:\n", diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb index b0992f1..2cf6b18 100644 --- a/viz_scripts/generic_metrics.ipynb +++ b/viz_scripts/generic_metrics.ipynb @@ -30,7 +30,7 @@ "program = \"default\"\n", "study_type = \"study\"\n", "include_test_users = False\n", - "dynamic_labels = {}\n", + "labels = {}\n", "use_imperial = True\n", "sensed_algo_prefix = \"cleaned\"\n", "survey_info = {}" @@ -97,8 +97,8 @@ "metadata": {}, "outputs": [], "source": [ - "colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = await scaffolding.mapping_color_labels(dynamic_labels)\n", - "values_to_translations, value_to_translations_purpose, values_to_translations_replaced = await scaffolding.translate_values_to_labels(dynamic_labels)" + "colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = scaffolding.mapping_color_labels(labels)\n", + "values_to_translations, value_to_translations_purpose, values_to_translations_replaced = scaffolding.translate_values_to_labels(labels)" ] }, { @@ -120,7 +120,7 @@ " month,\n", " program,\n", " study_type,\n", - " dynamic_labels,\n", + " labels,\n", " include_test_users=include_test_users)" ] }, @@ -142,6 +142,7 @@ "expanded_ct_sensed, file_suffix_sensed, quality_text_sensed, debug_df_sensed = await scaffolding.load_viz_notebook_sensor_inference_data(year,\n", " month,\n", " program,\n", + " labels,\n", " include_test_users,\n", " sensed_algo_prefix)" ] @@ -165,7 +166,7 @@ " month,\n", " program,\n", " study_type,\n", - " dynamic_labels,\n", + " labels,\n", " include_test_users=include_test_users)" ] }, diff --git a/viz_scripts/generic_metrics_sensed.ipynb b/viz_scripts/generic_metrics_sensed.ipynb index 9d4191f..a800c84 100644 --- a/viz_scripts/generic_metrics_sensed.ipynb +++ b/viz_scripts/generic_metrics_sensed.ipynb @@ -30,6 +30,7 @@ "program = \"default\"\n", "study_type = \"study\"\n", "mode_of_interest = None\n", + "labels = {}\n", "include_test_users = False\n", "use_imperial = False\n", "sensed_algo_prefix = \"cleaned\"\n", @@ -77,6 +78,7 @@ "expanded_ct, file_suffix, quality_text, debug_df = await scaffolding.load_viz_notebook_sensor_inference_data(year,\n", " month,\n", " program,\n", + " labels,\n", " include_test_users,\n", " sensed_algo_prefix)" ] @@ -96,7 +98,7 @@ " expanded_ct[\"primary_mode\"] = expanded_ct.ble_sensed_summary.apply(lambda md: max(md[\"distance\"], key=md[\"distance\"].get))\n", " unique_keys = expanded_ct.groupby(\"primary_mode\").agg({distance_col: \"count\"}).index\n", " print(unique_keys)\n", - " colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = await scaffolding.mapping_color_labels(unique_keys) #Extract ble color mapping\n", + " colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = scaffolding.mapping_color_labels(unique_keys) #Extract ble color mapping\n", " colors_sensed = colors_ble\n", "except ValueError as e:\n", " print(\"Got ValueError \", e)" diff --git a/viz_scripts/generic_timeseries.ipynb b/viz_scripts/generic_timeseries.ipynb index 54ab23a..40d49d9 100644 --- a/viz_scripts/generic_timeseries.ipynb +++ b/viz_scripts/generic_timeseries.ipynb @@ -28,7 +28,7 @@ "study_type = \"study\"\n", "mode_of_interest = None\n", "include_test_users = False\n", - "dynamic_labels = {}\n", + "labels = {}\n", "use_imperial = False\n", "sensed_algo_prefix = \"cleaned\"" ] @@ -84,7 +84,7 @@ " month,\n", " program,\n", " study_type,\n", - " dynamic_labels,\n", + " labels,\n", " include_test_users=include_test_users,\n", " add_footprint=True)\n", "expanded_ct = scaffolding.unpack_energy_emissions(expanded_ct) if \"mode_confirm_footprint\" in expanded_ct.columns else expanded_ct\n", @@ -92,6 +92,7 @@ "expanded_ct_sensed, file_suffix_sensed, quality_text_sensed, debug_df_sensed = await scaffolding.load_viz_notebook_sensor_inference_data(year,\n", " month,\n", " program,\n", + " labels,\n", " include_test_users,\n", " sensed_algo_prefix)" ] @@ -143,10 +144,6 @@ " expanded_ct_sensed.user_id = pd.Categorical(expanded_ct_sensed.user_id)\n", " expanded_ct_sensed.date_time = pd.Categorical(expanded_ct_sensed.date_time)\n", "\n", - "if len(dynamic_labels) > 0:\n", - " labels=dynamic_labels\n", - "else:\n", - " labels = await emcu.read_json_resource(\"label-options.default.json\")\n", "dic_mode_mapping = scaffolding.mapping_labels(labels, \"MODE\")\n", "\n", "if \"Mode_confirm\" in expanded_ct.columns:\n", diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb index a165630..0753a60 100644 --- a/viz_scripts/mode_specific_metrics.ipynb +++ b/viz_scripts/mode_specific_metrics.ipynb @@ -31,7 +31,7 @@ "study_type = \"program\"\n", "mode_of_interest = \"e-bike\"\n", "include_test_users = False\n", - "dynamic_labels = { }\n", + "labels = { }\n", "use_imperial = True" ] }, @@ -95,8 +95,8 @@ "metadata": {}, "outputs": [], "source": [ - "colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = await scaffolding.mapping_color_labels(dynamic_labels)\n", - "values_to_translations, value_to_translations_purpose, value_to_translations_replaced = await scaffolding.translate_values_to_labels(dynamic_labels)" + "colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = scaffolding.mapping_color_labels(labels)\n", + "values_to_translations, value_to_translations_purpose, value_to_translations_replaced = scaffolding.translate_values_to_labels(labels)" ] }, { @@ -118,7 +118,7 @@ " month,\n", " program,\n", " study_type,\n", - " dynamic_labels,\n", + " labels,\n", " include_test_users=include_test_users)" ] }, @@ -141,7 +141,7 @@ " month,\n", " program,\n", " study_type,\n", - " dynamic_labels,\n", + " labels,\n", " include_test_users=include_test_users)" ] }, diff --git a/viz_scripts/mode_specific_timeseries.ipynb b/viz_scripts/mode_specific_timeseries.ipynb index 2342951..69af5ea 100644 --- a/viz_scripts/mode_specific_timeseries.ipynb +++ b/viz_scripts/mode_specific_timeseries.ipynb @@ -28,7 +28,7 @@ "study_type = \"program\"\n", "mode_of_interest = \"e-bike\"\n", "include_test_users = False\n", - "dynamic_labels = { }\n", + "labels = { }\n", "use_imperial = False" ] }, @@ -94,7 +94,7 @@ " month,\n", " program,\n", " study_type,\n", - " dynamic_labels,\n", + " labels,\n", " include_test_users=include_test_users)" ] }, @@ -177,10 +177,6 @@ " mode_distance_interest = mode_distance[mode_distance['mode_confirm']==mode_of_interest].copy()\n", "\n", " # Mapping new mode labels with dictionaries\n", - " if len(dynamic_labels) > 0:\n", - " labels=dynamic_labels\n", - " else:\n", - " labels = await emcu.read_json_resource(\"label-options.default.json\")\n", " dic_mode_mapping = scaffolding.mapping_labels(labels, \"MODE\")\n", " mode_counts['Mode_confirm'] = mode_counts['mode_confirm'].map(dic_mode_mapping)\n", " mode_counts_interest['Mode_confirm'] = mode_counts_interest['mode_confirm'].map(dic_mode_mapping)\n", diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index 3e46eba..7d19424 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -54,9 +54,8 @@ def get_participant_uuids(program, load_test_users): disp.display(participant_list.user_email) return participant_uuid_str -async def add_base_mode_footprint(trip_list): +async def add_base_mode_footprint(trip_list, labels): #TODO filter ahead of this so only labeled trips get a footprint OR display uncertainties - labels = await emcu.read_json_resource("label-options.default.json") value_to_basemode = {mode["value"]: mode.get("base_mode", mode.get("baseMode", "UNKNOWN")) for mode in labels["MODE"]} for trip in trip_list: @@ -84,11 +83,11 @@ async def add_base_mode_footprint(trip_list): return trip_list -async def load_all_confirmed_trips(tq, add_footprint): +async def load_all_confirmed_trips(tq, labels, add_footprint): agg = esta.TimeSeries.get_aggregate_time_series() result_it = agg.find_entries(["analysis/confirmed_trip"], tq) if add_footprint: - processed_list = await add_base_mode_footprint(list(result_it)) + processed_list = await add_base_mode_footprint(list(result_it), labels) all_ct = agg.to_data_df("analysis/confirmed_trip", processed_list) else: all_ct = agg.to_data_df("analysis/confirmed_trip", result_it) @@ -96,9 +95,9 @@ async def load_all_confirmed_trips(tq, add_footprint): disp.display(all_ct.head()) return all_ct -async def load_all_participant_trips(program, tq, load_test_users, add_footprint=False): +async def load_all_participant_trips(program, tq, load_test_users, labels, add_footprint=False): participant_list = get_participant_uuids(program, load_test_users) - all_ct = await load_all_confirmed_trips(tq, add_footprint) + all_ct = await load_all_confirmed_trips(tq, labels, add_footprint) # CASE 1 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 if len(all_ct) == 0: return all_ct @@ -177,7 +176,7 @@ def _select_max_label(row): unique_users = lambda df: len(df.user_id.unique()) if "user_id" in df.columns else 0 trip_label_count = lambda s, df: len(df[s].dropna()) if s in df.columns else 0 -async def load_viz_notebook_data(year, month, program, study_type, dynamic_labels, include_test_users=False, add_footprint=False): +async def load_viz_notebook_data(year, month, program, study_type, labels, include_test_users=False, add_footprint=False): #TODO - see how slow the loading the footprint is compared to just the baseMode, and evaluate if passing param around is needed """ Inputs: year/month/program/study_type = parameters from the visualization notebook @@ -187,11 +186,11 @@ async def load_viz_notebook_data(year, month, program, study_type, dynamic_label """ # Access database tq = get_time_query(year, month) - participant_ct_df = await load_all_participant_trips(program, tq, include_test_users, add_footprint) + participant_ct_df = await load_all_participant_trips(program, tq, include_test_users, labels, add_footprint) labeled_ct = filter_labeled_trips(participant_ct_df) expanded_ct = expand_userinputs(labeled_ct) expanded_ct = data_quality_check(expanded_ct) - expanded_ct = await map_trip_data(expanded_ct, study_type, dynamic_labels) + expanded_ct = map_trip_data(expanded_ct, study_type, labels) # Document data quality file_suffix = get_file_suffix(year, month, program) @@ -211,19 +210,12 @@ async def load_viz_notebook_data(year, month, program, study_type, dynamic_label return expanded_ct, file_suffix, quality_text, debug_df -async def map_trip_data(expanded_trip_df, study_type, dynamic_labels): +def map_trip_data(expanded_trip_df, study_type, labels): # Change meters to miles # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 if "distance" in expanded_trip_df.columns: unit_conversions(expanded_trip_df) - # Select the labels from dynamic_labels is available, - # else get it from emcommon/resources/label-options.default.json - if (len(dynamic_labels)): - labels = dynamic_labels - else: - labels = await emcu.read_json_resource("label-options.default.json") - # Map new mode labels with translations dictionary from dynamic_labels # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 if "mode_confirm" in expanded_trip_df.columns: @@ -254,7 +246,7 @@ async def map_trip_data(expanded_trip_df, study_type, dynamic_labels): return expanded_trip_df -async def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, include_test_users=False): +async def load_viz_notebook_inferred_data(year, month, program, study_type, labels, include_test_users=False): """ Inputs: year/month/program/study_type = parameters from the visualization notebook dic_* = label mappings; if dic_pur is included it will be used to recode trip purpose @@ -263,10 +255,10 @@ async def load_viz_notebook_inferred_data(year, month, program, study_type, dyna """ # Access database tq = get_time_query(year, month) - participant_ct_df = await load_all_participant_trips(program, tq, include_test_users) + participant_ct_df = await load_all_participant_trips(program, tq, include_test_users, labels) inferred_ct = filter_inferred_trips(participant_ct_df) expanded_it = expand_inferredlabels(inferred_ct) - expanded_it = await map_trip_data(expanded_it, study_type, dynamic_labels) + expanded_it = map_trip_data(expanded_it, study_type, labels) # Document data quality file_suffix = get_file_suffix(year, month, program) @@ -307,15 +299,9 @@ def translate_labels(labels): # Function: Maps "MODE", "PURPOSE", and "REPLACED_MODE" to colors. # Input: dynamic_labels # Output: Dictionary mapping between color with mode/purpose/sensed -async def mapping_color_labels(dynamic_labels = {}, unique_keys = []): - # Load default options from e-mission-common - labels = await emcu.read_json_resource("label-options.default.json") +def mapping_color_labels(labels = {}, unique_keys = []): sensed_values = ["WALKING", "BICYCLING", "IN_VEHICLE", "AIR_OR_HSR", "UNKNOWN", "OTHER", "INVALID"] - # If dynamic_labels are provided, then we will use the dynamic labels for mapping - if len(dynamic_labels) > 0: - labels = dynamic_labels - # Load base mode values and purpose values mode_values = [mode["value"] for mode in labels["MODE"]] if "MODE" in labels else [] purpose_values = [mode["value"] for mode in labels["PURPOSE"]] if "PURPOSE" in labels else [] @@ -343,13 +329,7 @@ async def mapping_color_labels(dynamic_labels = {}, unique_keys = []): ], adjustment_range=[1,1.8]) return colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble -async def translate_values_to_labels(dynamic_labels, language="en"): - # Load default options from e-mission-common - labels = await emcu.read_json_resource("label-options.default.json") - - # If dynamic_labels are provided, then we will use the dynamic labels for mapping - if len(dynamic_labels) > 0: - labels = dynamic_labels +def translate_values_to_labels(labels, language="en"): # Mapping between values and translations for display on plots (for Mode) values_to_translations_mode = mapping_labels(labels, "MODE") # Mapping between values and translations for display on plots (for Purpose) @@ -373,14 +353,14 @@ def mapping_color_surveys(dic_options): return colors -async def load_viz_notebook_sensor_inference_data(year, month, program, include_test_users=False, sensed_algo_prefix="cleaned"): +async def load_viz_notebook_sensor_inference_data(year, month, program, labels, include_test_users=False, sensed_algo_prefix="cleaned"): """ Inputs: year/month/program = parameters from the visualization notebook Pipeline to load and process the data before use in sensor-based visualization notebooks. """ tq = get_time_query(year, month) - participant_ct_df = await load_all_participant_trips(program, tq, include_test_users, False) + participant_ct_df = await load_all_participant_trips(program, tq, include_test_users, labels, False) expanded_ct = participant_ct_df print(f"Loaded expanded_ct with length {len(expanded_ct)} for {tq}") @@ -427,14 +407,14 @@ async def load_viz_notebook_sensor_inference_data(year, month, program, include_ return expanded_ct, file_suffix, quality_text, debug_df -async def load_viz_notebook_survey_data(year, month, program, include_test_users=False): +async def load_viz_notebook_survey_data(year, month, program, labels, include_test_users=False): """ Inputs: year/month/program/test users = parameters from the visualization notebook Returns: df of all trips taken by participants, df of all trips with user_input """ tq = get_time_query(year, month) - participant_ct_df = await load_all_participant_trips(program, tq, include_test_users, False) + participant_ct_df = await load_all_participant_trips(program, tq, include_test_users, labels, False) labeled_ct = filter_labeled_trips(participant_ct_df) # Document data quality diff --git a/viz_scripts/survey_metrics.ipynb b/viz_scripts/survey_metrics.ipynb index cf724a0..cfa032d 100644 --- a/viz_scripts/survey_metrics.ipynb +++ b/viz_scripts/survey_metrics.ipynb @@ -30,7 +30,7 @@ "program = \"default\"\n", "study_type = \"study\"\n", "include_test_users = False\n", - "dynamic_labels = {}\n", + "labels = {}\n", "use_imperial = True\n", "sensed_algo_prefix = \"cleaned\"\n", "bluetooth_only = True #current proxy for fleet status\n", @@ -61,7 +61,7 @@ "label_units, short_label, label_units_lower, distance_col, weight_unit = scaffolding.get_units(use_imperial)\n", "\n", "# get color mappings\n", - "colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = await scaffolding.mapping_color_labels() #just need sensed" + "colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = scaffolding.mapping_color_labels() #just need sensed" ] }, { @@ -96,6 +96,7 @@ "expanded_ct_sensed, file_suffix_sensed, quality_text_sensed, debug_df_sensed = await scaffolding.load_viz_notebook_sensor_inference_data(year,\n", " month,\n", " program,\n", + " labels,\n", " include_test_users,\n", " sensed_algo_prefix)" ] @@ -114,7 +115,7 @@ " expanded_ct_sensed[\"primary_mode\"] = expanded_ct_sensed.ble_sensed_summary.apply(lambda md: max(md[\"distance\"], key=md[\"distance\"].get))\n", " unique_keys = expanded_ct_sensed.groupby(\"primary_mode\").agg({distance_col: \"count\"}).index\n", " print(unique_keys)\n", - " colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = await scaffolding.mapping_color_labels(unique_keys)\n", + " colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = scaffolding.mapping_color_labels(unique_keys)\n", " colors_sensed = colors_ble\n", "except ValueError as e:\n", " print(\"Got ValueError \", e)" diff --git a/viz_scripts/survey_responses.ipynb b/viz_scripts/survey_responses.ipynb index 53a633c..6b045cc 100644 --- a/viz_scripts/survey_responses.ipynb +++ b/viz_scripts/survey_responses.ipynb @@ -13,7 +13,7 @@ "study_type = \"study\"\n", "mode_of_interest = None\n", "include_test_users = True\n", - "dynamic_labels = {}\n", + "labels = {}\n", "use_imperial = True\n", "survey_info = {}" ]