diff --git a/README.md b/README.md index 355cca4..294eb88 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,9 @@ https://towardsdatascience.com/dockerize-your-dash-app-1e155dd1cea3 ## How to run it: Docker Compose (recommended) -`docker compose -f docker-compose-dash-app.yml build` +`docker compose -f docker-compose-dev.yml build` -`docker compose -f docker-compose-dash-app.yml up` +`docker compose -f docker-compose-dev.yml up` You **must** use this method. Do **not** try to directly by setting up a virtualenv with the `requirements.txt` This uses components of the e-mission-server core, so it must have the e-mission-server modules in the PYTHONPATH @@ -65,12 +65,18 @@ These are all the permissions that you can specify: ### Data Page - `data_uuids`: User can view the UUIDs data in the Data page. - `data_trips`: User can view the trips data in the Data page. +- `data_demographics`: User can view the demographics data in the Data page. +- `data_trajectories`: User can view the trajectories data in the Data page. - `data_trips_columns_exclude`: It used to specify a list of column names that should be excluded from the trips data that is displayed on the Data page. It includes valid columns from the **Stage_analysis_timeseries** collection. Valid columns are specified in the following sections. - `data_uuids_columns_exclude`: It used to specify a list of column names that should be excluded from the uuids data that is displayed on the Data page. It includes valid columns from the **Stage_uuids** collection. Valid columns are specified in the following sections. +- `data_demographics_columns_exclude`: It used to specify a list of column names that should be excluded from the demographics data +that is displayed on the Data page. +- `data_trajectories_columns_exclude`: It used to specify a list of column names that should be excluded from the trajectories data +that is displayed on the Data page. ### Token Page - `token_generate`: User can generate new tokens in the Token page. diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index a3abd9d..f446833 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -10,7 +10,7 @@ For more details on building multi-page Dash applications, check out the Dash documentation: https://dash.plot.ly/urls """ import os -from datetime import date +import arrow import dash import dash_bootstrap_components as dbc @@ -23,7 +23,8 @@ if os.getenv('DASH_DEBUG_MODE', 'True').lower() == 'true': logging.basicConfig(level=logging.DEBUG) -from utils.db_utils import query_uuids, query_confirmed_trips +from utils.datetime_utils import iso_to_date_only +from utils.db_utils import df_to_filtered_records, query_uuids, query_confirmed_trips, query_demographics from utils.permissions import has_permission import flask_talisman as flt @@ -133,61 +134,161 @@ className="sidebar", ) +# Global controls including date picker and timezone selector +def make_controls(): + # according to docs, DatePickerRange will accept YYYY-MM-DD format + today_date = arrow.now().format('YYYY-MM-DD') + last_week_date = arrow.now().shift(days=-7).format('YYYY-MM-DD') + tomorrow_date = arrow.now().shift(days=1).format('YYYY-MM-DD') + return html.Div([ + html.Div([ + # Global Date Picker + dcc.DatePickerRange( + id='date-picker', + display_format='D MMM Y', + start_date=last_week_date, + end_date=today_date, + min_date_allowed='2010-1-1', + max_date_allowed=tomorrow_date, + initial_visible_month=today_date, + ), + dbc.Button( + html.I(className="fas fa-bars", id='collapse-icon'), + outline=True, + id="collapse-button", + n_clicks=0, + style={'color': '#444', 'border': '1px solid #dbdbdb', + 'border-radius': '3px', 'margin-left': '3px'} + ), + ], + style={'display': 'flex'}, + ), + dbc.Collapse([ + html.Div([ + html.Span('Query trips using: ', style={'margin-right': '10px'}), + dcc.Dropdown( + id='date-picker-timezone', + options=[ + {'label': 'UTC Time', 'value': 'utc'}, + {'label': 'My Local Timezone', 'value': 'local'}, + # {'label': 'Local Timezone of Trips', 'value': 'trips'}, + ], + value='utc', + clearable=False, + searchable=False, + style={'width': '180px'}, + )] + ), -content = html.Div([ - # Global Date Picker - html.Div( - dcc.DatePickerRange( - id='date-picker', - display_format='D/M/Y', - start_date_placeholder_text='D/M/Y', - end_date_placeholder_text='D/M/Y', - min_date_allowed=date(2010, 1, 1), - max_date_allowed=date.today(), - initial_visible_month=date.today(), - ), style={'margin': '10px 10px 0 0', 'display': 'flex', 'justify-content': 'right'} - ), - - # Pages Content - dcc.Loading( - type='default', - fullscreen=True, - children=html.Div(dash.page_container, style={ - "margin-left": "5rem", - "margin-right": "2rem", - "padding": "2rem 1rem", - }) - ), -]) + dcc.Checklist( + id='global-filters', + options=[ + {'label': 'Exclude "test" users', + 'value': 'exclude-test-users'}, + ], + value=['exclude-test-users'], + style={'margin-top': '10px'}, + ), + ], + id='collapse-filters', + is_open=False, + style={'padding': '5px 15px 10px', 'border': '1px solid #dbdbdb', 'border-top': '0'} + ), + ], + style={'margin': '10px 10px 0 auto', + 'width': 'fit-content', + 'display': 'flex', + 'flex-direction': 'column'} + ) + +page_content = dcc.Loading( + type='default', + fullscreen=True, + children=html.Div(dash.page_container, style={ + "margin-left": "5rem", + "margin-right": "2rem", + "padding": "2rem 1rem", + }) +) -home_page = [ +def make_home_page(): return [ sidebar, - content, + html.Div([make_controls(), page_content]) ] -app.layout = html.Div( - [ - dcc.Location(id='url', refresh=False), - dcc.Store(id='store-trips', data={}), - dcc.Store(id='store-uuids', data={}), - html.Div(id='page-content', children=home_page), - ] -) +def make_layout(): return html.Div([ + dcc.Location(id='url', refresh=False), + dcc.Store(id='store-trips', data={}), + dcc.Store(id='store-uuids', data={}), + dcc.Store(id='store-excluded-uuids', data={}), # if 'test' users are excluded, a list of their uuids + dcc.Store(id='store-demographics', data={}), + dcc.Store(id='store-trajectories', data={}), + html.Div(id='page-content', children=make_home_page()), +]) +app.layout = make_layout +# make the 'filters' menu collapsible +@app.callback( + Output("collapse-filters", "is_open"), + Output("collapse-icon", "className"), + [Input("collapse-button", "n_clicks")], + [Input("collapse-filters", "is_open")], +) +def toggle_collapse_filters(n, is_open): + if not n: return (is_open, "fas fa-bars") + if is_open: + return (False, "fas fa-bars") + else: + return (True, "fas fa-chevron-up") # Load data stores @app.callback( Output("store-uuids", "data"), + Output("store-excluded-uuids", "data"), + Input('date-picker', 'start_date'), # these are ISO strings + Input('date-picker', 'end_date'), # these are ISO strings + Input('date-picker-timezone', 'value'), + Input('global-filters', 'value'), +) +def update_store_uuids(start_date, end_date, timezone, filters): + (start_date, end_date) = iso_to_date_only(start_date, end_date) + dff = query_uuids(start_date, end_date, timezone) + if dff.empty: + return {"data": [], "length": 0}, {"data": [], "length": 0} + # if 'exclude-testusers' filter is active, + # exclude any rows with user_token containing 'test', and + # output a list of those excluded UUIDs so other callbacks can exclude them too + if 'exclude-test-users' in filters: + excluded_uuids_list = dff[dff['user_token'].str.contains( + 'test')]['user_id'].tolist() + else: + excluded_uuids_list = [] + records = df_to_filtered_records(dff, 'user_id', excluded_uuids_list) + store_uuids = { + "data": records, + "length": len(records), + } + store_excluded_uuids = { + "data": excluded_uuids_list, + "length": len(excluded_uuids_list), + } + return store_uuids, store_excluded_uuids + + +@app.callback( + Output("store-demographics", "data"), Input('date-picker', 'start_date'), Input('date-picker', 'end_date'), + Input('date-picker-timezone', 'value'), + Input('store-excluded-uuids', 'data'), ) -def update_store_uuids(start_date, end_date): - start_date_obj = date.fromisoformat(start_date) if start_date else None - end_date_obj = date.fromisoformat(end_date) if end_date else None - dff = query_uuids(start_date_obj, end_date_obj) - records = dff.to_dict("records") +def update_store_demographics(start_date, end_date, timezone, excluded_uuids): + dataframes = query_demographics() + records = {} + for key, df in dataframes.items(): + records[key] = df_to_filtered_records(df, 'user_id', excluded_uuids["data"]) store = { "data": records, "length": len(records), @@ -198,14 +299,15 @@ def update_store_uuids(start_date, end_date): # Note: this triggers twice on load, not great with a slow db @app.callback( Output("store-trips", "data"), - Input('date-picker', 'start_date'), - Input('date-picker', 'end_date'), + Input('date-picker', 'start_date'), # these are ISO strings + Input('date-picker', 'end_date'), # these are ISO strings + Input('date-picker-timezone', 'value'), + Input('store-excluded-uuids', 'data'), ) -def update_store_trips(start_date, end_date): - start_date_obj = date.fromisoformat(start_date) if start_date else None - end_date_obj = date.fromisoformat(end_date) if end_date else None - df = query_confirmed_trips(start_date_obj, end_date_obj) - records = df.to_dict("records") +def update_store_trips(start_date, end_date, timezone, excluded_uuids): + (start_date, end_date) = iso_to_date_only(start_date, end_date) + df = query_confirmed_trips(start_date, end_date, timezone) + records = df_to_filtered_records(df, 'user_id', excluded_uuids["data"]) # logging.debug("returning records %s" % records[0:2]) store = { "data": records, @@ -228,10 +330,10 @@ def display_page(search): return get_cognito_login_page('Unsuccessful authentication, try again.', 'red') if is_authenticated: - return home_page + return make_home_page() return get_cognito_login_page() - return home_page + return make_home_page() extra_csp_url = [ "https://raw.githubusercontent.com", diff --git a/docker/Dockerfile b/docker/Dockerfile index c212d90..2c564f3 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM shankari/e-mission-server:gis-based-mode-detection_2023-04-21--54-09 +FROM shankari/e-mission-server:master_2024-02-10--19-38 ENV DASH_DEBUG_MODE True ENV SERVER_PORT 8050 diff --git a/docker/load_mongodump.sh b/docker/load_mongodump.sh new file mode 100755 index 0000000..bd6fcea --- /dev/null +++ b/docker/load_mongodump.sh @@ -0,0 +1,10 @@ +MONGODUMP_FILE=$1 + +echo "Copying file to docker container" +docker cp $MONGODUMP_FILE op-admin-dashboard-db-1:/tmp + +FILE_NAME=`basename $MONGODUMP_FILE` + +echo "Restoring the dump from $FILE_NAME" +docker exec -e MONGODUMP_FILE=$FILE_NAME op-admin-dashboard-db-1 bash -c 'cd /tmp && tar xvf $MONGODUMP_FILE && mongorestore' + diff --git a/pages/data.py b/pages/data.py index 851c4fa..635b74b 100644 --- a/pages/data.py +++ b/pages/data.py @@ -3,16 +3,17 @@ Since the dcc.Location component is not in the layout when navigating to this page, it triggers the callback. The workaround is to check if the input value is None. """ -from dash import dcc, html, Input, Output, callback, register_page, dash_table - +from dash import dcc, html, Input, Output, callback, register_page, dash_table, State # Etc import logging import pandas as pd from dash.exceptions import PreventUpdate +from utils import constants from utils import permissions as perm_utils from utils import db_utils - +from utils.db_utils import df_to_filtered_records, query_trajectories +from utils.datetime_utils import iso_to_date_only register_page(__name__, path="/data") intro = """## Data""" @@ -21,9 +22,10 @@ [ dcc.Markdown(intro), dcc.Tabs(id="tabs-datatable", value='tab-uuids-datatable', children=[ - # dcc.Tab(label='Demographics survey', value='tab-demographics-survey-datatable'), dcc.Tab(label='UUIDs', value='tab-uuids-datatable'), dcc.Tab(label='Trips', value='tab-trips-datatable'), + dcc.Tab(label='Demographics', value='tab-demographics-datatable'), + dcc.Tab(label='Trajectories', value='tab-trajectories-datatable'), ]), html.Div(id='tabs-content'), ] @@ -37,14 +39,31 @@ def clean_location_data(df): df['data.end_loc.coordinates'] = df['data.end_loc.coordinates'].apply(lambda x: f'({x[0]}, {x[1]})') return df +def update_store_trajectories(start_date: str, end_date: str, tz: str, excluded_uuids): + global store_trajectories + df = query_trajectories(start_date, end_date, tz) + records = df_to_filtered_records(df, 'user_id', excluded_uuids["data"]) + store = { + "data": records, + "length": len(records), + } + store_trajectories = store + return store + @callback( Output('tabs-content', 'children'), Input('tabs-datatable', 'value'), Input('store-uuids', 'data'), + Input('store-excluded-uuids', 'data'), Input('store-trips', 'data'), + Input('store-demographics', 'data'), + Input('store-trajectories', 'data'), + Input('date-picker', 'start_date'), + Input('date-picker', 'end_date'), + Input('date-picker-timezone', 'value'), ) -def render_content(tab, store_uuids, store_trips): +def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_demographics, store_trajectories, start_date, end_date, timezone): data, columns, has_perm = None, [], False if tab == 'tab-uuids-datatable': data = store_uuids["data"] @@ -58,21 +77,110 @@ def render_content(tab, store_uuids, store_trips): col['label'] for col in perm_utils.get_allowed_named_trip_columns() ) has_perm = perm_utils.has_permission('data_trips') + df = pd.DataFrame(data) + if df.empty or not has_perm: + return None + + df = df.drop(columns=[col for col in df.columns if col not in columns]) + df = clean_location_data(df) + + trips_table = populate_datatable(df,'trips-table') + #Return an HTML Div containing a button (button-clicked) and the populated datatable + return html.Div([ + html.Button( + 'Display columns with raw units', + id='button-clicked', #identifier for the button + n_clicks=0, #initialize number of clicks to 0 + style={'marginLeft':'5px'} + ), + trips_table, #populated trips table component + ]) + + elif tab == 'tab-demographics-datatable': + data = store_demographics["data"] + has_perm = perm_utils.has_permission('data_demographics') + # if only one survey is available, process it without creating a subtab + if len(data) == 1: + # here data is a dictionary + data = list(data.values())[0] + columns = list(data[0].keys()) + # for multiple survey, create subtabs for unique surveys + elif len(data) > 1: + #returns subtab only if has_perm is True + if not has_perm: + return None + return html.Div([ + dcc.Tabs(id='subtabs-demographics', value=list(data.keys())[0], children=[ + dcc.Tab(label= key, value= key) for key in data + ]), + html.Div(id='subtabs-demographics-content') + ]) + elif tab == 'tab-trajectories-datatable': + # Currently store_trajectories data is loaded only when the respective tab is selected + #Here we query for trajectory data once "Trajectories" tab is selected + (start_date, end_date) = iso_to_date_only(start_date, end_date) + if store_trajectories == {}: + store_trajectories = update_store_trajectories(start_date, end_date, timezone, store_excluded_uuids) + data = store_trajectories["data"] + if data: + columns = list(data[0].keys()) + columns = perm_utils.get_trajectories_columns(columns) + has_perm = perm_utils.has_permission('data_trajectories') + df = pd.DataFrame(data) if df.empty or not has_perm: return None df = df.drop(columns=[col for col in df.columns if col not in columns]) - df = clean_location_data(df) return populate_datatable(df) +# handle subtabs for demographic table when there are multiple surveys +@callback( + Output('subtabs-demographics-content', 'children'), + Input('subtabs-demographics', 'value'), + Input('store-demographics', 'data'), +) + +def update_sub_tab(tab, store_demographics): + data = store_demographics["data"] + if tab in data: + data = data[tab] + if data: + columns = list(data[0].keys()) + + df = pd.DataFrame(data) + if df.empty: + return None + + df = df.drop(columns=[col for col in df.columns if col not in columns]) + + return populate_datatable(df) + + +@callback( + Output('trips-table', 'hidden_columns'), # Output hidden columns in the trips-table + Output('button-clicked', 'children'), #updates button label + Input('button-clicked', 'n_clicks'), #number of clicks on the button + State('button-clicked', 'children') #State representing the current label of button +) +#Controls visibility of columns in trips table and updates the label of button based on the number of clicks. +def update_dropdowns_trips(n_clicks, button_label): + if n_clicks % 2 == 0: + hidden_col = ["data.duration_seconds", "data.distance_meters","data.distance"] + button_label = 'Display columns with raw units' + else: + hidden_col = ["data.duration", "data.distance_miles", "data.distance_km", "data.distance"] + button_label = 'Display columns with humanzied units' + #return the list of hidden columns and the updated button label + return hidden_col, button_label + -def populate_datatable(df): +def populate_datatable(df, table_id=''): if not isinstance(df, pd.DataFrame): raise PreventUpdate return dash_table.DataTable( - # id='my-table', + id= table_id, # columns=[{"name": i, "id": i} for i in df.columns], data=df.to_dict('records'), export_format="csv", @@ -88,5 +196,6 @@ def populate_datatable(df): # 'width': '100px', # 'maxWidth': '100px', }, - style_table={'overflowX': 'auto'} + style_table={'overflowX': 'auto'}, + css=[{"selector":".show-hide", "rule":"display:none"}] ) diff --git a/pages/home.py b/pages/home.py index 1b27f5d..3d98dc6 100644 --- a/pages/home.py +++ b/pages/home.py @@ -5,7 +5,6 @@ """ from uuid import UUID - from dash import dcc, html, Input, Output, callback, register_page import dash_bootstrap_components as dbc @@ -19,6 +18,7 @@ import emission.core.get_database as edb from utils.permissions import has_permission +from utils.datetime_utils import iso_to_date_only register_page(__name__, path="/") @@ -176,11 +176,14 @@ def generate_plot_sign_up_trend(store_uuids): @callback( Output('fig-trips-trend', 'figure'), Input('store-trips', 'data'), + Input('date-picker', 'start_date'), # these are ISO strings + Input('date-picker', 'end_date'), # these are ISO strings ) -def generate_plot_trips_trend(store_trips): +def generate_plot_trips_trend(store_trips, start_date, end_date): df = pd.DataFrame(store_trips.get("data")) trend_df = None + (start_date, end_date) = iso_to_date_only(start_date, end_date) if not df.empty and has_permission('overview_trips_trend'): trend_df = compute_trips_trend(df, date_col = "trip_start_time_str") - fig = generate_barplot(trend_df, x = 'date', y = 'count', title = "Trips trend") + fig = generate_barplot(trend_df, x = 'date', y = 'count', title = f"Trips trend({start_date} to {end_date})") return fig diff --git a/pages/map.py b/pages/map.py index f668b6c..2f62cc7 100644 --- a/pages/map.py +++ b/pages/map.py @@ -57,19 +57,36 @@ def create_lines_map(trips_group_by_user_id, user_id_list): return fig -def create_heatmap_fig(data): +def get_map_coordinates(trips_group_by_user_mode, user_mode_list): + coordinates = {'lat': [], 'lon': [], 'color':[]} + for user_mode in user_mode_list: + color = trips_group_by_user_mode[user_mode]['color'] + trips = trips_group_by_user_mode[user_mode]['trips'] + + for trip in trips: + coordinates['lon'].append(trip['start_coordinates'][0]) + coordinates['lon'].append(trip['end_coordinates'][0]) + coordinates['lat'].append(trip['start_coordinates'][1]) + coordinates['lat'].append(trip['end_coordinates'][1]) + coordinates['color'].extend([color,color]) + return coordinates + + +def create_heatmap_fig(coordinates): fig = go.Figure() - if len(data.get('lat', [])) > 0: + if len(coordinates.get('lat', [])) > 0: fig.add_trace( go.Densitymapbox( - lon=data['lon'], - lat=data['lat'], + lon=coordinates['lon'], + lat=coordinates['lat'], + name = '', + ) ) fig.update_layout( mapbox_style='open-street-map', - mapbox_center_lon=data['lon'][0], - mapbox_center_lat=data['lat'][0], + mapbox_center_lon=coordinates['lon'][0], + mapbox_center_lat=coordinates['lat'][0], mapbox_zoom=11, margin={"r": 0, "t": 30, "l": 0, "b": 0}, height=650, @@ -77,25 +94,25 @@ def create_heatmap_fig(data): return fig -def create_bubble_fig(data): +def create_bubble_fig(coordinates): fig = go.Figure() - if len(data.get('lon', [])) > 0: + if len(coordinates.get('lon', [])) > 0: fig.add_trace( go.Scattermapbox( - lat=data['lat'], - lon=data['lon'], + lat=coordinates['lat'], + lon=coordinates['lon'], mode='markers', marker=go.scattermapbox.Marker( size=9, - color='royalblue', + color=coordinates['color'], ), ) ) fig.update_layout( autosize=True, mapbox_style='open-street-map', - mapbox_center_lon=data['lon'][0], - mapbox_center_lat=data['lat'][0], + mapbox_center_lon=coordinates['lon'][0], + mapbox_center_lat=coordinates['lat'][0], mapbox_zoom=11, mapbox_bearing=0, margin={'r': 0, 't': 30, 'l': 0, 'b': 0}, @@ -111,6 +128,14 @@ def get_trips_group_by_user_id(trips_data): trips_group_by_user_id = trips_df.groupby('user_id') return trips_group_by_user_id +def get_trips_group_by_user_mode(trips_data): + trips_group_by_user_mode = None + trips_df = pd.DataFrame(trips_data['data']) + if not trips_df.empty: + trips_df['data.user_input.mode_confirm'] = trips_df['data.user_input.mode_confirm'].fillna('Unknown') + trips_group_by_user_mode = trips_df.groupby('data.user_input.mode_confirm') + return trips_group_by_user_mode + def create_single_option(value, color): return { 'label': html.Span( @@ -148,6 +173,15 @@ def create_user_emails_options(trips_group_by_user_id): options.append(create_single_option(user_email, color)) return options, user_emails +def create_user_modes_options(trips_group_by_user_mode): + options = list() + user_modes = set() + for user_mode in trips_group_by_user_mode: + color = trips_group_by_user_mode[user_mode]['color'] + user_modes.add(user_mode) + options.append(create_single_option(user_mode, color)) + return options, user_modes + map_type_options = [] if has_permission('map_heatmap'): map_type_options.append({'label': 'Density Heatmap', 'value': 'heatmap'}) @@ -182,7 +216,11 @@ def create_user_emails_options(trips_group_by_user_id): dbc.Col([ html.Label('User Emails'), dcc.Dropdown(id='user-email-dropdown', multi=True), - ], style={'display': 'block' if has_permission('options_emails') else 'none'}) + ], style={'display': 'block' if has_permission('options_emails') else 'none'}), + dbc.Col([ + html.Label('Modes'), + dcc.Dropdown(id='user-mode-dropdown', multi=True), + ], style={'display': 'block'}) ]), dbc.Row( @@ -198,7 +236,7 @@ def create_user_emails_options(trips_group_by_user_id): Input('user-id-dropdown', 'value'), ) def update_user_ids_options(trips_data, selected_user_ids): - user_ids_options, user_ids = create_user_ids_options(trips_data['users_data']) + user_ids_options, user_ids = create_user_ids_options(trips_data['users_data_by_user_id']) if selected_user_ids is not None: selected_user_ids = [user_id for user_id in selected_user_ids if user_id in user_ids] return user_ids_options, selected_user_ids @@ -211,31 +249,46 @@ def update_user_ids_options(trips_data, selected_user_ids): Input('user-email-dropdown', 'value'), ) def update_user_emails_options(trips_data, selected_user_emails): - user_emails_options, user_emails = create_user_emails_options(trips_data['users_data']) + user_emails_options, user_emails = create_user_emails_options(trips_data['users_data_by_user_id']) if selected_user_emails is not None: selected_user_emails = [user_email for user_email in selected_user_emails if user_email in user_emails] return user_emails_options, selected_user_emails +@callback( + Output('user-mode-dropdown', 'options'), + Output('user-mode-dropdown', 'value'), + Input('store-trips-map', 'data'), + Input('user-mode-dropdown', 'value'), +) +def update_user_modes_options(trips_data, selected_user_modes): + user_modes_options, user_modes = create_user_modes_options(trips_data['users_data_by_user_mode']) + if selected_user_modes is not None: + selected_user_modes = [mode_confirm for mode_confirm in selected_user_modes if mode_confirm in user_modes] + return user_modes_options, selected_user_modes @callback( Output('trip-map', 'figure'), Input('map-type-dropdown', 'value'), Input('user-id-dropdown', 'value'), Input('user-email-dropdown', 'value'), + Input('user-mode-dropdown', 'value'), State('store-trips-map', 'data'), ) -def update_output(map_type, selected_user_ids, selected_user_emails, trips_data): +def update_output(map_type, selected_user_ids, selected_user_emails, selected_user_modes, trips_data): user_ids = set(selected_user_ids) if selected_user_ids is not None else set() + user_modes=set(selected_user_modes) if selected_user_modes is not None else set() + coordinates = get_map_coordinates(trips_data.get('users_data_by_user_mode', {}), user_modes) if selected_user_emails is not None: for user_email in selected_user_emails: user_ids.add(str(ecwu.User.fromEmail(user_email).uuid)) - if map_type == 'lines': - return create_lines_map(trips_data.get('users_data', {}), user_ids) + if selected_user_modes: + return create_lines_map(trips_data.get('users_data_by_user_mode', {}), user_modes) + return create_lines_map(trips_data.get('users_data_by_user_id', {}), user_ids) elif map_type == 'heatmap': - return create_heatmap_fig(trips_data.get('coordinates', {})) + return create_heatmap_fig(coordinates) elif map_type == 'bubble': - return create_bubble_fig(trips_data.get('coordinates', {})) + return create_bubble_fig(coordinates) else: return go.Figure() @@ -244,33 +297,40 @@ def update_output(map_type, selected_user_ids, selected_user_emails, trips_data) Output('user-id-dropdown', 'disabled'), Output('user-email-dropdown', 'disabled'), Input('map-type-dropdown', 'value'), + Input('user-mode-dropdown', 'value'), ) -def control_user_dropdowns(map_type): +def control_user_dropdowns(map_type,selected_user_modes): disabled = True if map_type == 'lines': disabled = False + if selected_user_modes: + disabled = True return disabled, disabled +def process_trips_group(trips_group): + users_data = dict() + #processes a group of trips, assigns color to each group and stores the processed data in a dictionary + if trips_group: + keys = list(trips_group) + n = len(keys) % 360 + k = 359 // (n - 1) if n > 1 else 0 + for ind, key in enumerate(trips_group.groups.keys()): + color = f'hsl({ind * k}, 100%, 50%)' + trips = trips_group.get_group(key).to_dict("records") + users_data[key] = {'color': color, 'trips': trips} + return users_data + + @callback( Output('store-trips-map', 'data'), Input('store-trips', 'data'), ) def store_trips_map_data(trips_data): trips_group_by_user_id = get_trips_group_by_user_id(trips_data) - users_data = dict() - coordinates = {'lat': [], 'lon': []} - if trips_group_by_user_id: - user_ids = list(trips_group_by_user_id) - n = len(user_ids) % 360 - k = 359 // (n - 1) if n > 1 else 0 - for ind, user_id in enumerate(trips_group_by_user_id.groups.keys()): - color = f'hsl({ind * k}, 100%, 50%)' - trips = trips_group_by_user_id.get_group(user_id).sort_values('trip_start_time_str').to_dict("records") - users_data[user_id] = {'color': color, 'trips': trips} - for trip in trips: - coordinates['lon'].append(trip['start_coordinates'][0]) - coordinates['lon'].append(trip['end_coordinates'][0]) - coordinates['lat'].append(trip['start_coordinates'][1]) - coordinates['lat'].append(trip['end_coordinates'][1]) - return {'users_data': users_data, 'coordinates': coordinates} + users_data_by_user_id = process_trips_group(trips_group_by_user_id) + + trips_group_by_user_mode = get_trips_group_by_user_mode(trips_data) + users_data_by_user_mode = process_trips_group(trips_group_by_user_mode) + + return {'users_data_by_user_id':users_data_by_user_id, 'users_data_by_user_mode':users_data_by_user_mode} \ No newline at end of file diff --git a/pages/push_notification.py b/pages/push_notification.py index dc03219..57c954f 100644 --- a/pages/push_notification.py +++ b/pages/push_notification.py @@ -190,12 +190,13 @@ def send_push_notification( send_n_clicks, log, query_spec, emails, uuids, log_o logs.append("dry run, skipping actual push") return "\n".join(logs), 0 else: + response = pnu.send_visible_notification_to_users( + uuid_list, + title, + message, + survey_spec, + ) + pnu.display_response(response) + logs.append("Push notification sent successfully") return "\n".join(logs), 0 - # response = pnu.send_visible_notification_to_users( - # uuid_list, - # title, - # message, - # survey_spec, - # ) - # pnu.display_response(response) return log, 0 diff --git a/requirements.txt b/requirements.txt index 61cf274..18ab846 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,10 +8,11 @@ dash_extensions==0.1.13 #dashboard_setup/nrel_dash_components-0.0.1.tar.gz # for docker-compose pylint==2.17.2 qrcode==7.4.2 -pillow==9.5.0 -requests==2.28.2 +pillow==10.0.1 +requests==2.31.0 python-jose==3.3.0 flask==2.2.5 flask-talisman==1.0.0 dash_auth==2.0.0 +arrow==1.3.0 dash-leaflet==1.0.7 diff --git a/utils/constants.py b/utils/constants.py index 9eed848..3d53363 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -17,7 +17,11 @@ "data.end_local_dt", "data.end_fmt_time", "data.duration", + "data.duration_seconds", "data.distance", + "data.distance_km", + "data.distance_miles", + "data.distance_meters", "data.start_loc.coordinates", "data.end_loc.coordinates", "user_id" @@ -44,3 +48,47 @@ 'os_version', 'phone_lang', ] + +BINARY_DEMOGRAPHICS_COLS = [ + 'user_id', + '_id', +] + +EXCLUDED_DEMOGRAPHICS_COLS = [ + 'data.xmlResponse', + 'data.name', + 'data.version', + 'data.label', + 'xmlns:jr', + 'xmlns:orx', + 'id', + 'start', + 'end', + 'attrxmlns:jr', + 'attrxmlns:orx', + 'attrid', + '__version__', + 'attrversion', + 'instanceID', +] + +EXCLUDED_TRAJECTORIES_COLS = [ + 'data.loc.type', + 'data.loc.coordinates', + 'data.local_dt.year', + 'data.local_dt.month', + 'data.local_dt.day', + 'data.local_dt.hour', + 'data.local_dt.minute', + 'data.local_dt.second', + 'data.local_dt.weekday', + 'data.local_dt.timezone', + 'data.local_dt_year', + 'data.local_dt_month', + 'data.local_dt_day', + 'data.local_dt_hour', + 'data.local_dt_minute', + 'data.local_dt_second', + 'data.local_dt_weekday', + 'data.local_dt_timezone', +] \ No newline at end of file diff --git a/utils/datetime_utils.py b/utils/datetime_utils.py new file mode 100644 index 0000000..b626191 --- /dev/null +++ b/utils/datetime_utils.py @@ -0,0 +1,32 @@ +import arrow + +MAX_EPOCH_TIME = 2 ** 31 - 1 + + +def iso_range_to_ts_range(start_date: str, end_date: str, tz: str): + """ + Returns a tuple of (start_ts, end_ts) as epoch timestamps, given start_date and end_date in + ISO format and the timezone mode in which the dates should be resolved to timestamps ('utc' or 'local') + """ + start_ts, end_ts = None, MAX_EPOCH_TIME + if start_date is not None: + if tz == 'utc': + start_ts = arrow.get(start_date).timestamp() + elif tz == 'local': + start_ts = arrow.get(start_date, tzinfo='local').timestamp() + if end_date is not None: + if tz == 'utc': + end_ts = arrow.get(end_date).replace( + hour=23, minute=59, second=59).timestamp() + elif tz == 'local': + end_ts = arrow.get(end_date, tzinfo='local').replace( + hour=23, minute=59, second=59).timestamp() + return (start_ts, end_ts) + + +def iso_to_date_only(*iso_strs: str): + """ + For each ISO date string in the input, returns only the date part in the format 'YYYY-MM-DD' + e.g. '2021-01-01T00:00:00.000Z' -> '2021-01-01' + """ + return [iso_str[:10] if iso_str else None for iso_str in iso_strs] diff --git a/utils/db_utils.py b/utils/db_utils.py index 96a359c..28063a5 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -1,8 +1,6 @@ import logging -from datetime import datetime, timezone -from uuid import UUID - import arrow +from uuid import UUID import pandas as pd import pymongo @@ -11,30 +9,47 @@ import emission.storage.timeseries.abstract_timeseries as esta import emission.storage.timeseries.aggregate_timeseries as estag import emission.storage.timeseries.timequery as estt +import emission.core.wrapper.motionactivity as ecwm import emission.storage.timeseries.geoquery as estg import emission.storage.decorations.section_queries as esds import emission.core.wrapper.modeprediction as ecwm from utils import constants from utils import permissions as perm_utils - - -def query_uuids(start_date, end_date): - query = {'update_ts': {'$exists': True}} - if start_date is not None: - start_time = datetime.combine(start_date, datetime.min.time()).astimezone(timezone.utc) - query['update_ts']['$gte'] = start_time - - if end_date is not None: - end_time = datetime.combine(end_date, datetime.max.time()).astimezone(timezone.utc) - query['update_ts']['$lt'] = end_time - - projection = { - '_id': 0, - 'user_id': '$uuid', - 'user_token': '$user_email', - 'update_ts': 1 - } +from utils.datetime_utils import iso_range_to_ts_range + +def df_to_filtered_records(df, col_to_filter=None, vals_to_exclude: list[str] = []): + """ + Returns a dictionary of df records, given a dataframe, a column to filter on, + and a list of values that rows in that column will be excluded if they match + """ + if df.empty: return [] + if col_to_filter and vals_to_exclude: # will only filter if both are not None or [] + df = df[~df[col_to_filter].isin(vals_to_exclude)] + return df.to_dict("records") + +def query_uuids(start_date: str, end_date: str, tz: str): + # As of now, time filtering does not apply to UUIDs; we just query all of them. + # Vestigial code commented out and left below for future reference + + # logging.debug("Querying the UUID DB for %s -> %s" % (start_date,end_date)) + # query = {'update_ts': {'$exists': True}} + # if start_date is not None: + # # have arrow create a datetime using start_date and time 00:00:00 in UTC + # start_time = arrow.get(start_date).datetime + # query['update_ts']['$gte'] = start_time + # if end_date is not None: + # # have arrow create a datetime using end_date and time 23:59:59 in UTC + # end_time = arrow.get(end_date).replace(hour=23, minute=59, second=59).datetime + # query['update_ts']['$lt'] = end_time + # projection = { + # '_id': 0, + # 'user_id': '$uuid', + # 'user_token': '$user_email', + # 'update_ts': 1 + # } + + logging.debug("Querying the UUID DB for (no date range)") # This should actually use the profile DB instead of (or in addition to) # the UUID DB so that we can see the app version, os, manufacturer... @@ -50,14 +65,8 @@ def query_uuids(start_date, end_date): df.drop(columns=["uuid", "_id"], inplace=True) return df -def query_confirmed_trips(start_date, end_date): - start_ts, end_ts = None, datetime.max.replace(tzinfo=timezone.utc).timestamp() - if start_date is not None: - start_ts = datetime.combine(start_date, datetime.min.time()).timestamp() - - if end_date is not None: - end_ts = datetime.combine(end_date, datetime.max.time()).timestamp() - +def query_confirmed_trips(start_date: str, end_date: str, tz: str): + (start_ts, end_ts) = iso_range_to_ts_range(start_date, end_date, tz) ts = esta.TimeSeries.get_aggregate_time_series() # Note to self, allow end_ts to also be null in the timequery # we can then remove the start_time, end_time logic @@ -86,14 +95,16 @@ def query_confirmed_trips(start_date, end_date): # https://github.com/e-mission/op-admin-dashboard/issues/29#issuecomment-1530105040 # https://github.com/e-mission/op-admin-dashboard/issues/29#issuecomment-1530439811 # so just replacing the distance and duration with the humanized values for now + df['data.distance_meters'] = df['data.distance'] use_imperial = perm_utils.config.get("display_config", {"use_imperial": False}).get("use_imperial", False) # convert to km to humanize - df['data.distance'] = df['data.distance'] / 1000 + df['data.distance_km'] = df['data.distance'] / 1000 # convert km further to miles because this is the US, Liberia or Myanmar # https://en.wikipedia.org/wiki/Mile + df['data.duration_seconds'] = df['data.duration'] if use_imperial: - df['data.distance'] = df['data.distance'] * 0.6213712 + df['data.distance_miles'] = df['data.distance_km'] * 0.6213712 df['data.duration'] = df['data.duration'].apply(lambda d: arrow.utcnow().shift(seconds=d).humanize(only_distance=True)) @@ -102,26 +113,77 @@ def query_confirmed_trips(start_date, end_date): # logging.debug("After filtering, the actual data is %s" % df.head().trip_start_time_str) return df +def query_demographics(): + # Returns dictionary of df where key represent differnt survey id and values are df for each survey + logging.debug("Querying the demographics for (no date range)") + ts = esta.TimeSeries.get_aggregate_time_series() + + entries = ts.find_entries(["manual/demographic_survey"]) + data = list(entries) + + available_key = {} + for entry in data: + survey_key = list(entry['data']['jsonDocResponse'].keys())[0] + if survey_key not in available_key: + available_key[survey_key] = [] + available_key[survey_key].append(entry) + + dataframes = {} + for key, json_object in available_key.items(): + df = pd.json_normalize(json_object) + dataframes[key] = df + + for key, df in dataframes.items(): + if not df.empty: + for col in constants.BINARY_DEMOGRAPHICS_COLS: + if col in df.columns: + df[col] = df[col].apply(str) + columns_to_drop = [col for col in df.columns if col.startswith("metadata")] + df.drop(columns= columns_to_drop, inplace=True) + modified_columns = perm_utils.get_demographic_columns(df.columns) + df.columns = modified_columns + df.columns=[col.rsplit('.',1)[-1] if col.startswith('data.jsonDocResponse.') else col for col in df.columns] + for col in constants.EXCLUDED_DEMOGRAPHICS_COLS: + if col in df.columns: + df.drop(columns= [col], inplace=True) + + return dataframes + +def query_trajectories(start_date: str, end_date: str, tz: str): + + (start_ts, end_ts) = iso_range_to_ts_range(start_date, end_date, tz) + ts = esta.TimeSeries.get_aggregate_time_series() + entries = ts.find_entries( + key_list=["analysis/recreated_location"], + time_query=estt.TimeQuery("data.ts", start_ts, end_ts), + ) + df = pd.json_normalize(list(entries)) + if not df.empty: + for col in df.columns: + if df[col].dtype == 'object': + df[col] = df[col].apply(str) + columns_to_drop = [col for col in df.columns if col.startswith("metadata")] + df.drop(columns= columns_to_drop, inplace=True) + for col in constants.EXCLUDED_TRAJECTORIES_COLS: + if col in df.columns: + df.drop(columns= [col], inplace=True) + df['data.mode_str'] = df['data.mode'].apply(lambda x: ecwm.MotionTypes(x).name if x in set(enum.value for enum in ecwm.MotionTypes) else 'UNKNOWN') + return df + def add_user_stats(user_data): for user in user_data: user_uuid = UUID(user['user_id']) - # TODO: Use the time-series functions when the needed functionality is added. - total_trips = edb.get_analysis_timeseries_db().count_documents( - { - 'user_id': user_uuid, - 'metadata.key': 'analysis/confirmed_trip', - } + total_trips = esta.TimeSeries.get_aggregate_time_series().find_entries_count( + key_list=["analysis/confirmed_trip"], + extra_query_list=[{'user_id': user_uuid}] ) user['total_trips'] = total_trips - labeled_trips = edb.get_analysis_timeseries_db().count_documents( - { - 'user_id': user_uuid, - 'metadata.key': 'analysis/confirmed_trip', - 'data.user_input': {'$ne': {}}, - } + labeled_trips = esta.TimeSeries.get_aggregate_time_series().find_entries_count( + key_list=["analysis/confirmed_trip"], + extra_query_list=[{'user_id': user_uuid}, {'data.user_input': {'$ne': {}}}] ) user['labeled_trips'] = labeled_trips diff --git a/utils/permissions.py b/utils/permissions.py index 99ec0d9..a5304a4 100644 --- a/utils/permissions.py +++ b/utils/permissions.py @@ -29,6 +29,12 @@ }) permissions = config.get("admin_dashboard", {}) +# TODO: The current dynamic config does not have the data_demographics_columns_exclude. +# When all the current studies are completed we can remove the below changes. +if 'data_demographics_columns_exclude' not in permissions: + permissions['data_demographics_columns_exclude'] = [] +if 'data_trajectories_columns_exclude' not in permissions: + permissions['data_trajectories_columns_exclude'] = [] def has_permission(perm): return False if permissions.get(perm) is False else True @@ -87,6 +93,16 @@ def get_uuids_columns(): columns.discard(column) return columns +def get_demographic_columns(columns): + for column in permissions.get("data_demographics_columns_exclude", []): + columns.discard(column) + return columns + +def get_trajectories_columns(columns): + columns = set(columns) + for column in permissions.get("data_trajectories_columns_exclude", []): + columns.discard(column) + return columns def get_token_prefix(): return permissions['token_prefix'] + '_' if permissions.get('token_prefix') else ''