From 5f3eabfe8889eb856b483e10dfc7b5a257abff24 Mon Sep 17 00:00:00 2001 From: achasmita Date: Tue, 22 Aug 2023 20:12:18 -0700 Subject: [PATCH 01/63] Updated README with correct yml file. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7919d68..7c73ce6 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,9 @@ https://towardsdatascience.com/dockerize-your-dash-app-1e155dd1cea3 ## How to run it: Docker Compose (recommended) -`docker compose -f docker-compose-dash-app.yml build` +`docker compose -f docker-compose-dev.yml build` -`docker compose -f docker-compose-dash-app.yml up` +`docker compose -f docker-compose-dev.yml up` You **must** use this method. Do **not** try to directly by setting up a virtualenv with the `requirements.txt` This uses components of the e-mission-server core, so it must have the e-mission-server modules in the PYTHONPATH From bb5a122d31d2e7517abe13166afab0776383b44f Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Wed, 30 Aug 2023 23:22:38 -0700 Subject: [PATCH 02/63] added demographics table to data page and update README --- README.md | 1 + app_sidebar_collapsible.py | 22 ++++++++++- docker-compose-dev.yml | 4 +- docker/load_mongodump.sh | 10 +++++ docker/start.sh | 2 +- pages/data.py | 10 ++++- utils/constants.py | 79 ++++++++++++++++++++++++++++++++++++++ utils/db_utils.py | 27 +++++++++++++ utils/permissions.py | 6 +++ 9 files changed, 156 insertions(+), 5 deletions(-) create mode 100755 docker/load_mongodump.sh diff --git a/README.md b/README.md index 7c73ce6..d54268b 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,7 @@ These are all the permissions that you can specify: ### Data Page - `data_uuids`: User can view the UUIDs data in the Data page. - `data_trips`: User can view the trips data in the Data page. +- `data_demographics`: User can view the trips data in the Data page. - `data_trips_columns_exclude`: It used to specify a list of column names that should be excluded from the trips data that is displayed on the Data page. It includes valid columns from the **Stage_analysis_timeseries** collection. Valid columns are specified in the following sections. diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index 64077db..4a38e6b 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -23,7 +23,7 @@ if os.getenv('DASH_DEBUG_MODE', 'True').lower() == 'true': logging.basicConfig(level=logging.DEBUG) -from utils.db_utils import query_uuids, query_confirmed_trips +from utils.db_utils import query_uuids, query_confirmed_trips, query_demographics from utils.permissions import has_permission import flask_talisman as flt @@ -163,6 +163,7 @@ dcc.Location(id='url', refresh=False), dcc.Store(id='store-trips', data={}), dcc.Store(id='store-uuids', data={}), + dcc.Store(id='store-demographics', data={}), html.Div(id='page-content', children=home_page), ] ) @@ -204,6 +205,25 @@ def update_store_trips(start_date, end_date): return store +@app.callback( + Output("store-demographics", "data"), + Input('date-picker', 'start_date'), + Input('date-picker', 'end_date'), +) +def update_store_demographics(start_date, end_date): + start_date_obj = date.fromisoformat(start_date) if start_date else None + end_date_obj = date.fromisoformat(end_date) if end_date else None + df = query_demographics(start_date_obj, end_date_obj) + + records = df.to_dict("records") + + store = { + "data": records, + "length": len(records), + } + return store + + # Define the callback to display the page content based on the URL path @app.callback( Output('page-content', 'children'), diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 75f8df2..6aa606e 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -12,7 +12,8 @@ services: DASH_DEBUG_MODE: "True" DASH_SILENCE_ROUTES_LOGGING: "False" DASH_SERVER_PORT: 8050 - DB_HOST: db + # DB_HOST: db + DB_HOST: "mongodb://db/openpath_prod_durham" WEB_SERVER_HOST: 0.0.0.0 SERVER_BRANCH: master CONFIG_PATH: "https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/configs/" @@ -47,3 +48,4 @@ networks: volumes: mongo-data: + \ No newline at end of file diff --git a/docker/load_mongodump.sh b/docker/load_mongodump.sh new file mode 100755 index 0000000..bd6fcea --- /dev/null +++ b/docker/load_mongodump.sh @@ -0,0 +1,10 @@ +MONGODUMP_FILE=$1 + +echo "Copying file to docker container" +docker cp $MONGODUMP_FILE op-admin-dashboard-db-1:/tmp + +FILE_NAME=`basename $MONGODUMP_FILE` + +echo "Restoring the dump from $FILE_NAME" +docker exec -e MONGODUMP_FILE=$FILE_NAME op-admin-dashboard-db-1 bash -c 'cd /tmp && tar xvf $MONGODUMP_FILE && mongorestore' + diff --git a/docker/start.sh b/docker/start.sh index 05b2f12..fac36b2 100755 --- a/docker/start.sh +++ b/docker/start.sh @@ -7,7 +7,7 @@ if [ -z ${DB_HOST} ] ; then local_host=`hostname -i` sed "s_localhost_${local_host}_" conf/storage/db.conf.sample > conf/storage/db.conf else - sed "s_localhost_${DB_HOST}_" conf/storage/db.conf.sample > conf/storage/db.conf + sed "s-localhost-${DB_HOST}-" conf/storage/db.conf.sample > conf/storage/db.conf fi # run the app diff --git a/pages/data.py b/pages/data.py index 851c4fa..e3b98c0 100644 --- a/pages/data.py +++ b/pages/data.py @@ -21,9 +21,9 @@ [ dcc.Markdown(intro), dcc.Tabs(id="tabs-datatable", value='tab-uuids-datatable', children=[ - # dcc.Tab(label='Demographics survey', value='tab-demographics-survey-datatable'), dcc.Tab(label='UUIDs', value='tab-uuids-datatable'), dcc.Tab(label='Trips', value='tab-trips-datatable'), + dcc.Tab(label='Demographics', value='tab-demographics-datatable' ) ]), html.Div(id='tabs-content'), ] @@ -43,8 +43,9 @@ def clean_location_data(df): Input('tabs-datatable', 'value'), Input('store-uuids', 'data'), Input('store-trips', 'data'), + Input('store-demographics', 'data'), ) -def render_content(tab, store_uuids, store_trips): +def render_content(tab, store_uuids, store_trips, store_demographics): data, columns, has_perm = None, [], False if tab == 'tab-uuids-datatable': data = store_uuids["data"] @@ -58,6 +59,11 @@ def render_content(tab, store_uuids, store_trips): col['label'] for col in perm_utils.get_allowed_named_trip_columns() ) has_perm = perm_utils.has_permission('data_trips') + elif tab == 'tab-demographics-datatable': + data = store_demographics["data"] + columns = perm_utils.get_demographics_columns() # TODO + has_perm = perm_utils.has_permission('data_demographics') + df = pd.DataFrame(data) if df.empty or not has_perm: return None diff --git a/utils/constants.py b/utils/constants.py index 9eed848..a1f8254 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -44,3 +44,82 @@ 'os_version', 'phone_lang', ] + +VALID_DEMOGRAPHICS_COLS = [ + 'user_id', + '_id', + 'data.ts', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.What_is_your_race_ethnicity', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.Are_you_a_student', + 'data.local_dt.year', + 'data.local_dt.month', + 'data.local_dt.day', + 'data.local_dt.hour', + 'data.local_dt.minute', + 'data.local_dt.second', + 'data.local_dt.weekday', + 'data.local_dt.timezone', + 'read_ts', + 'metadata.time_zone', + 'metadata.plugin', + 'metadata.write_ts', + 'metadata.platform', + 'metadata.read_ts', + 'metadata.key', + 'metadata.type', + 'metadata.write_local_dt.year', + 'metadata.write_local_dt.month', + 'metadata.write_local_dt.day', + 'metadata.write_local_dt.hour', + 'metadata.write_local_dt.minute', + 'metadata.write_local_dt.second', + 'metadata.write_local_dt.weekday', + 'metadata.write_local_dt.timezone', + 'metadata.write_fmt_time', + 'data.ts', + 'data.xmlResponse', + 'data.label', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.attr.id', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.attr.xmlns:jr', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.attr.xmlns:orx', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_yk8eb99.At_your_primary_job_do_you_ha', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_yk8eb99.Which_best_describes_your_prim', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_yk8eb99.Do_you_work_full_time_or_part_', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_yk8eb99.Do_you_have_the_option_of_work', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_yk8eb99.Please_describe_your_primary_job', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_yk8eb99.Do_you_have_more_than_one_job', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_yk8eb99.What_days_of_the_week_do_you_t', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_yk8eb99.How_many_days_do_you_usually_w_001', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.end', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.start', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.Which_one_below_describe_you_b', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.What_is_your_race_ethnicity', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.Are_you_a_student', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.What_is_the_highest_grade_or_d', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.do_you_consider_yourself_to_be', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.What_is_your_gender', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.How_old_are_you', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.Are_you_a_paid_worker', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.Do_you_have_a_driver_license', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.meta.instanceID', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.How_long_you_had_this_conditio', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.Including_yourself_how_many_w_001', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.Including_yourself_how_many_p', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.Do_you_own_or_rent_your_home', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.Please_identify_which_category', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.If_you_were_unable_to_use_your', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.Including_yourself_how_many_p_001', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.Including_yourself_how_many_w', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.What_is_your_home_type', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.How_many_motor_vehicles_are_ow', + 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.Do_you_have_a_condition_or_han', + 'data.fmt_time', + 'data.name', + 'data.version', + +] + +BINARY_DEMOGRAPHICS_COLS = [ + 'user_id', + '_id', +] \ No newline at end of file diff --git a/utils/db_utils.py b/utils/db_utils.py index 1500633..72557b4 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -39,6 +39,8 @@ def query_uuids(start_date, end_date): # that with the profile data entries = edb.get_uuid_db().find() df = pd.json_normalize(list(entries)) + + # TODO: entires are empty here so UUIDS dataframe is not showing anything if not df.empty: df['update_ts'] = pd.to_datetime(df['update_ts']) df['user_id'] = df['uuid'].apply(str) @@ -98,6 +100,31 @@ def query_confirmed_trips(start_date, end_date): # logging.debug("After filtering, the actual data is %s" % df.head().trip_start_time_str) return df +def query_demographics(start_date, end_date): + start_ts, end_ts = None, datetime.max.timestamp() + if start_date is not None: + start_ts = datetime.combine(start_date, datetime.min.time()).timestamp() + + if end_date is not None: + end_ts = datetime.combine(end_date, datetime.max.time()).timestamp() + + ts = esta.TimeSeries.get_aggregate_time_series() + + entries = ts.find_entries( + key_list=["manual/demographic_survey"], + ) + + df = pd.json_normalize(list(entries)) + + if not df.empty: + columns = [col for col in perm_utils.get_demographics_columns() if col in df.columns] + df = df[columns] + for col in constants.BINARY_DEMOGRAPHICS_COLS: + if col in df.columns: + df[col] = df[col].apply(str) + df.drop(columns=['data.xmlResponse', 'data.name', 'data.version', 'data.label'], inplace=True) + return df + def add_user_stats(user_data): for user in user_data: diff --git a/utils/permissions.py b/utils/permissions.py index 99ec0d9..e280668 100644 --- a/utils/permissions.py +++ b/utils/permissions.py @@ -88,5 +88,11 @@ def get_uuids_columns(): return columns +def get_demographics_columns(): + columns = set(constants.VALID_DEMOGRAPHICS_COLS) + for column in permissions.get("data_demographics_columns_exclude", []): + columns.discard(column) + return columns + def get_token_prefix(): return permissions['token_prefix'] + '_' if permissions.get('token_prefix') else '' From af130707663bda84582f5b708ee30312a7023960 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Tue, 5 Sep 2023 19:39:19 -0700 Subject: [PATCH 03/63] addressing comments from previous pull request --- app_sidebar_collapsible.py | 32 +++++++---------- docker-compose-dev.yml | 3 +- docker/start.sh | 2 +- pages/data.py | 2 +- utils/constants.py | 74 -------------------------------------- utils/db_utils.py | 13 ++----- utils/permissions.py | 6 ---- 7 files changed, 17 insertions(+), 115 deletions(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index 4a38e6b..8380564 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -157,13 +157,24 @@ content, ] +def update_store_demographics(): + df = query_demographics() + records = df.to_dict("records") + + store = { + "data": records, + "length": len(records), + } + return store + +demographics_data = update_store_demographics() app.layout = html.Div( [ dcc.Location(id='url', refresh=False), dcc.Store(id='store-trips', data={}), dcc.Store(id='store-uuids', data={}), - dcc.Store(id='store-demographics', data={}), + dcc.Store(id='store-demographics', data= demographics_data), html.Div(id='page-content', children=home_page), ] ) @@ -205,25 +216,6 @@ def update_store_trips(start_date, end_date): return store -@app.callback( - Output("store-demographics", "data"), - Input('date-picker', 'start_date'), - Input('date-picker', 'end_date'), -) -def update_store_demographics(start_date, end_date): - start_date_obj = date.fromisoformat(start_date) if start_date else None - end_date_obj = date.fromisoformat(end_date) if end_date else None - df = query_demographics(start_date_obj, end_date_obj) - - records = df.to_dict("records") - - store = { - "data": records, - "length": len(records), - } - return store - - # Define the callback to display the page content based on the URL path @app.callback( Output('page-content', 'children'), diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 6aa606e..cf9efca 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -12,8 +12,7 @@ services: DASH_DEBUG_MODE: "True" DASH_SILENCE_ROUTES_LOGGING: "False" DASH_SERVER_PORT: 8050 - # DB_HOST: db - DB_HOST: "mongodb://db/openpath_prod_durham" + DB_HOST: db WEB_SERVER_HOST: 0.0.0.0 SERVER_BRANCH: master CONFIG_PATH: "https://raw.githubusercontent.com/e-mission/nrel-openpath-deploy-configs/main/configs/" diff --git a/docker/start.sh b/docker/start.sh index fac36b2..05b2f12 100755 --- a/docker/start.sh +++ b/docker/start.sh @@ -7,7 +7,7 @@ if [ -z ${DB_HOST} ] ; then local_host=`hostname -i` sed "s_localhost_${local_host}_" conf/storage/db.conf.sample > conf/storage/db.conf else - sed "s-localhost-${DB_HOST}-" conf/storage/db.conf.sample > conf/storage/db.conf + sed "s_localhost_${DB_HOST}_" conf/storage/db.conf.sample > conf/storage/db.conf fi # run the app diff --git a/pages/data.py b/pages/data.py index e3b98c0..0f0be07 100644 --- a/pages/data.py +++ b/pages/data.py @@ -61,7 +61,7 @@ def render_content(tab, store_uuids, store_trips, store_demographics): has_perm = perm_utils.has_permission('data_trips') elif tab == 'tab-demographics-datatable': data = store_demographics["data"] - columns = perm_utils.get_demographics_columns() # TODO + columns = list(data[0].keys()) has_perm = perm_utils.has_permission('data_demographics') df = pd.DataFrame(data) diff --git a/utils/constants.py b/utils/constants.py index a1f8254..56a3425 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -45,80 +45,6 @@ 'phone_lang', ] -VALID_DEMOGRAPHICS_COLS = [ - 'user_id', - '_id', - 'data.ts', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.What_is_your_race_ethnicity', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.Are_you_a_student', - 'data.local_dt.year', - 'data.local_dt.month', - 'data.local_dt.day', - 'data.local_dt.hour', - 'data.local_dt.minute', - 'data.local_dt.second', - 'data.local_dt.weekday', - 'data.local_dt.timezone', - 'read_ts', - 'metadata.time_zone', - 'metadata.plugin', - 'metadata.write_ts', - 'metadata.platform', - 'metadata.read_ts', - 'metadata.key', - 'metadata.type', - 'metadata.write_local_dt.year', - 'metadata.write_local_dt.month', - 'metadata.write_local_dt.day', - 'metadata.write_local_dt.hour', - 'metadata.write_local_dt.minute', - 'metadata.write_local_dt.second', - 'metadata.write_local_dt.weekday', - 'metadata.write_local_dt.timezone', - 'metadata.write_fmt_time', - 'data.ts', - 'data.xmlResponse', - 'data.label', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.attr.id', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.attr.xmlns:jr', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.attr.xmlns:orx', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_yk8eb99.At_your_primary_job_do_you_ha', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_yk8eb99.Which_best_describes_your_prim', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_yk8eb99.Do_you_work_full_time_or_part_', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_yk8eb99.Do_you_have_the_option_of_work', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_yk8eb99.Please_describe_your_primary_job', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_yk8eb99.Do_you_have_more_than_one_job', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_yk8eb99.What_days_of_the_week_do_you_t', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_yk8eb99.How_many_days_do_you_usually_w_001', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.end', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.start', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.Which_one_below_describe_you_b', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.What_is_your_race_ethnicity', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.Are_you_a_student', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.What_is_the_highest_grade_or_d', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.do_you_consider_yourself_to_be', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.What_is_your_gender', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.How_old_are_you', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.Are_you_a_paid_worker', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_hg4zz25.Do_you_have_a_driver_license', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.meta.instanceID', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.How_long_you_had_this_conditio', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.Including_yourself_how_many_w_001', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.Including_yourself_how_many_p', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.Do_you_own_or_rent_your_home', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.Please_identify_which_category', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.If_you_were_unable_to_use_your', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.Including_yourself_how_many_p_001', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.Including_yourself_how_many_w', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.What_is_your_home_type', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.How_many_motor_vehicles_are_ow', - 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.group_pa5ah98.Do_you_have_a_condition_or_han', - 'data.fmt_time', - 'data.name', - 'data.version', - -] - BINARY_DEMOGRAPHICS_COLS = [ 'user_id', '_id', diff --git a/utils/db_utils.py b/utils/db_utils.py index 72557b4..6551be9 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -100,14 +100,7 @@ def query_confirmed_trips(start_date, end_date): # logging.debug("After filtering, the actual data is %s" % df.head().trip_start_time_str) return df -def query_demographics(start_date, end_date): - start_ts, end_ts = None, datetime.max.timestamp() - if start_date is not None: - start_ts = datetime.combine(start_date, datetime.min.time()).timestamp() - - if end_date is not None: - end_ts = datetime.combine(end_date, datetime.max.time()).timestamp() - +def query_demographics(): ts = esta.TimeSeries.get_aggregate_time_series() entries = ts.find_entries( @@ -115,10 +108,8 @@ def query_demographics(start_date, end_date): ) df = pd.json_normalize(list(entries)) - + if not df.empty: - columns = [col for col in perm_utils.get_demographics_columns() if col in df.columns] - df = df[columns] for col in constants.BINARY_DEMOGRAPHICS_COLS: if col in df.columns: df[col] = df[col].apply(str) diff --git a/utils/permissions.py b/utils/permissions.py index e280668..99ec0d9 100644 --- a/utils/permissions.py +++ b/utils/permissions.py @@ -88,11 +88,5 @@ def get_uuids_columns(): return columns -def get_demographics_columns(): - columns = set(constants.VALID_DEMOGRAPHICS_COLS) - for column in permissions.get("data_demographics_columns_exclude", []): - columns.discard(column) - return columns - def get_token_prefix(): return permissions['token_prefix'] + '_' if permissions.get('token_prefix') else '' From b873a02d754cce32e2a35bed242ae4e96d19a337 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Mon, 11 Sep 2023 16:33:56 -0700 Subject: [PATCH 04/63] addressing comments from previous pull request --- README.md | 2 +- docker-compose-dev.yml | 3 +-- pages/data.py | 2 ++ utils/db_utils.py | 8 +------- utils/permissions.py | 4 ++++ 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index d54268b..f2da347 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ These are all the permissions that you can specify: ### Data Page - `data_uuids`: User can view the UUIDs data in the Data page. - `data_trips`: User can view the trips data in the Data page. -- `data_demographics`: User can view the trips data in the Data page. +- `data_demographics`: User can view the demographics data in the Data page. - `data_trips_columns_exclude`: It used to specify a list of column names that should be excluded from the trips data that is displayed on the Data page. It includes valid columns from the **Stage_analysis_timeseries** collection. Valid columns are specified in the following sections. diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index cf9efca..188d4a7 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -46,5 +46,4 @@ networks: emission: volumes: - mongo-data: - \ No newline at end of file + mongo-data: \ No newline at end of file diff --git a/pages/data.py b/pages/data.py index 0f0be07..3cc10f4 100644 --- a/pages/data.py +++ b/pages/data.py @@ -62,6 +62,8 @@ def render_content(tab, store_uuids, store_trips, store_demographics): elif tab == 'tab-demographics-datatable': data = store_demographics["data"] columns = list(data[0].keys()) + for column in perm_utils.permissions.get("data_demographics_columns_exclude", []): + columns.discard(column) has_perm = perm_utils.has_permission('data_demographics') df = pd.DataFrame(data) diff --git a/utils/db_utils.py b/utils/db_utils.py index 6551be9..3768784 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -39,8 +39,6 @@ def query_uuids(start_date, end_date): # that with the profile data entries = edb.get_uuid_db().find() df = pd.json_normalize(list(entries)) - - # TODO: entires are empty here so UUIDS dataframe is not showing anything if not df.empty: df['update_ts'] = pd.to_datetime(df['update_ts']) df['user_id'] = df['uuid'].apply(str) @@ -103,12 +101,8 @@ def query_confirmed_trips(start_date, end_date): def query_demographics(): ts = esta.TimeSeries.get_aggregate_time_series() - entries = ts.find_entries( - key_list=["manual/demographic_survey"], - ) - + entries = ts.find_entries(["manual/demographic_survey"]) df = pd.json_normalize(list(entries)) - if not df.empty: for col in constants.BINARY_DEMOGRAPHICS_COLS: if col in df.columns: diff --git a/utils/permissions.py b/utils/permissions.py index 99ec0d9..a172f71 100644 --- a/utils/permissions.py +++ b/utils/permissions.py @@ -29,6 +29,10 @@ }) permissions = config.get("admin_dashboard", {}) +# TODO: The current dynamic config does not have the data_demographics_columns_exclude. +# When all the current studies are completed we can remove the below changes. +if 'data_demographics_columns_exclude' not in permissions: + permissions['data_demographics_columns_exclude'] = [] def has_permission(perm): return False if permissions.get(perm) is False else True From 11620a0a7336bfef5a93eb711883238490bb9bed Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Tue, 12 Sep 2023 10:03:44 -0700 Subject: [PATCH 05/63] Revert docker-compose-dev.yml --- docker-compose-dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 188d4a7..75f8df2 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -46,4 +46,4 @@ networks: emission: volumes: - mongo-data: \ No newline at end of file + mongo-data: From 5ea421b3a8f9a7786f81e9a329f0667897fa0c8d Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Tue, 12 Sep 2023 10:24:31 -0700 Subject: [PATCH 06/63] Addressing comments from previous pull request --- README.md | 2 ++ pages/data.py | 2 -- utils/db_utils.py | 4 +++- utils/permissions.py | 4 ++++ 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f2da347..aa5a236 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,8 @@ columns are specified in the following sections. - `data_uuids_columns_exclude`: It used to specify a list of column names that should be excluded from the uuids data that is displayed on the Data page. It includes valid columns from the **Stage_uuids** collection. Valid columns are specified in the following sections. +- `data_demographics_columns_exclude`: It used to specify a list of column names that should be excluded from the demographics data +that is displayed on the Data page. ### Token Page - `token_generate`: User can generate new tokens in the Token page. diff --git a/pages/data.py b/pages/data.py index 3cc10f4..0f0be07 100644 --- a/pages/data.py +++ b/pages/data.py @@ -62,8 +62,6 @@ def render_content(tab, store_uuids, store_trips, store_demographics): elif tab == 'tab-demographics-datatable': data = store_demographics["data"] columns = list(data[0].keys()) - for column in perm_utils.permissions.get("data_demographics_columns_exclude", []): - columns.discard(column) has_perm = perm_utils.has_permission('data_demographics') df = pd.DataFrame(data) diff --git a/utils/db_utils.py b/utils/db_utils.py index 3768784..83f5a34 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -107,7 +107,9 @@ def query_demographics(): for col in constants.BINARY_DEMOGRAPHICS_COLS: if col in df.columns: df[col] = df[col].apply(str) - df.drop(columns=['data.xmlResponse', 'data.name', 'data.version', 'data.label'], inplace=True) + df.drop(columns=['data.xmlResponse', 'data.name', 'data.version', 'data.label'], inplace=True) + modified_columns = perm_utils.get_demographic_columns(df.columns) + df.columns = modified_columns return df diff --git a/utils/permissions.py b/utils/permissions.py index a172f71..ffc49df 100644 --- a/utils/permissions.py +++ b/utils/permissions.py @@ -91,6 +91,10 @@ def get_uuids_columns(): columns.discard(column) return columns +def get_demographic_columns(columns): + for column in permissions.get("data_demographics_columns_exclude", []): + columns.discard(column) + return columns def get_token_prefix(): return permissions['token_prefix'] + '_' if permissions.get('token_prefix') else '' From 1039d30f47c21ad1bed2808443a23b9ed57170d2 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Tue, 12 Sep 2023 15:58:59 -0700 Subject: [PATCH 07/63] removed metadata columns --- utils/db_utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/utils/db_utils.py b/utils/db_utils.py index 83f5a34..407d30a 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -106,10 +106,13 @@ def query_demographics(): if not df.empty: for col in constants.BINARY_DEMOGRAPHICS_COLS: if col in df.columns: - df[col] = df[col].apply(str) - df.drop(columns=['data.xmlResponse', 'data.name', 'data.version', 'data.label'], inplace=True) + df[col] = df[col].apply(str) + columns_to_drop = [col for col in df.columns if col.startswith("metadata")] + df.drop(columns= columns_to_drop, inplace=True) + df.drop(columns=['data.xmlResponse', 'data.name', 'data.version', 'data.label', 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.attr.id','data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.attr.xmlns:jr','data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.attr.xmlns:orx'], inplace=True) modified_columns = perm_utils.get_demographic_columns(df.columns) - df.columns = modified_columns + df.columns = modified_columns + df.columns=[col.rsplit('.',1)[-1] if col.startswith('data.jsonDocResponse.') else col for col in df.columns] return df From 37590cfdd2424e0cf9822051317d67fc5cbc02e1 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Tue, 12 Sep 2023 16:23:19 -0700 Subject: [PATCH 08/63] Addressing previous comments --- utils/db_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/db_utils.py b/utils/db_utils.py index 407d30a..6c15eda 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -109,10 +109,11 @@ def query_demographics(): df[col] = df[col].apply(str) columns_to_drop = [col for col in df.columns if col.startswith("metadata")] df.drop(columns= columns_to_drop, inplace=True) - df.drop(columns=['data.xmlResponse', 'data.name', 'data.version', 'data.label', 'data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.attr.id','data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.attr.xmlns:jr','data.jsonDocResponse.aSfdnWs9LE6q8YEF7u9n85.attr.xmlns:orx'], inplace=True) + df.drop(columns=['data.xmlResponse', 'data.name', 'data.version', 'data.label'], inplace=True) modified_columns = perm_utils.get_demographic_columns(df.columns) df.columns = modified_columns - df.columns=[col.rsplit('.',1)[-1] if col.startswith('data.jsonDocResponse.') else col for col in df.columns] + df.columns=[col.rsplit('.',1)[-1] if col.startswith('data.jsonDocResponse.') else col for col in df.columns] + df.drop(columns=['xmlns:jr', 'xmlns:orx', 'id'], inplace = True) return df From e753848770934a39e1198bd872a7a8ffc74e3498 Mon Sep 17 00:00:00 2001 From: Shankari Date: Tue, 12 Sep 2023 20:46:15 -0700 Subject: [PATCH 09/63] Bump up the base server image as part of the upgrade This adds the new `count_documents` call to the timeseries interface --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index c212d90..8ff8dab 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM shankari/e-mission-server:gis-based-mode-detection_2023-04-21--54-09 +FROM shankari/e-mission-server:gis-based-mode-detection_2023-09-13--30-28 ENV DASH_DEBUG_MODE True ENV SERVER_PORT 8050 From 89c110c60c5f8ae4b10df7b1d26caf6ddf2fd827 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Wed, 13 Sep 2023 19:08:58 -0700 Subject: [PATCH 10/63] Added trajectory table to data page --- README.md | 3 +++ app_sidebar_collapsible.py | 15 ++++++++++++++- pages/data.py | 10 ++++++++-- utils/constants.py | 7 +++++++ utils/db_utils.py | 18 ++++++++++++++++++ utils/permissions.py | 7 +++++++ 6 files changed, 57 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index aa5a236..b2cc9b7 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,7 @@ These are all the permissions that you can specify: - `data_uuids`: User can view the UUIDs data in the Data page. - `data_trips`: User can view the trips data in the Data page. - `data_demographics`: User can view the demographics data in the Data page. +- `data_trajectories`: User can view the trajectories data in the Data page. - `data_trips_columns_exclude`: It used to specify a list of column names that should be excluded from the trips data that is displayed on the Data page. It includes valid columns from the **Stage_analysis_timeseries** collection. Valid columns are specified in the following sections. @@ -74,6 +75,8 @@ that is displayed on the Data page. It includes valid columns from the **Stage_u specified in the following sections. - `data_demographics_columns_exclude`: It used to specify a list of column names that should be excluded from the demographics data that is displayed on the Data page. +- `data_trajectories_columns_exclude`: It used to specify a list of column names that should be excluded from the trajectories data +that is displayed on the Data page. ### Token Page - `token_generate`: User can generate new tokens in the Token page. diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index 8380564..b9f163f 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -23,7 +23,7 @@ if os.getenv('DASH_DEBUG_MODE', 'True').lower() == 'true': logging.basicConfig(level=logging.DEBUG) -from utils.db_utils import query_uuids, query_confirmed_trips, query_demographics +from utils.db_utils import query_uuids, query_confirmed_trips, query_demographics, query_trajectories from utils.permissions import has_permission import flask_talisman as flt @@ -169,12 +169,25 @@ def update_store_demographics(): demographics_data = update_store_demographics() + +def update_store_trajectories(): + df = query_trajectories() + records = df.to_dict("records") + store = { + "data": records, + "length": len(records), + } + return store + +trajectories_data = update_store_trajectories() + app.layout = html.Div( [ dcc.Location(id='url', refresh=False), dcc.Store(id='store-trips', data={}), dcc.Store(id='store-uuids', data={}), dcc.Store(id='store-demographics', data= demographics_data), + dcc.Store(id='store-trajectories', data= trajectories_data), html.Div(id='page-content', children=home_page), ] ) diff --git a/pages/data.py b/pages/data.py index 0f0be07..f73be7b 100644 --- a/pages/data.py +++ b/pages/data.py @@ -23,7 +23,8 @@ dcc.Tabs(id="tabs-datatable", value='tab-uuids-datatable', children=[ dcc.Tab(label='UUIDs', value='tab-uuids-datatable'), dcc.Tab(label='Trips', value='tab-trips-datatable'), - dcc.Tab(label='Demographics', value='tab-demographics-datatable' ) + dcc.Tab(label='Demographics', value='tab-demographics-datatable'), + dcc.Tab(label='Trajectories', value='tab-trajectories-datatable'), ]), html.Div(id='tabs-content'), ] @@ -44,8 +45,9 @@ def clean_location_data(df): Input('store-uuids', 'data'), Input('store-trips', 'data'), Input('store-demographics', 'data'), + Input('store-trajectories', 'data'), ) -def render_content(tab, store_uuids, store_trips, store_demographics): +def render_content(tab, store_uuids, store_trips, store_demographics, store_trajectories): data, columns, has_perm = None, [], False if tab == 'tab-uuids-datatable': data = store_uuids["data"] @@ -63,6 +65,10 @@ def render_content(tab, store_uuids, store_trips, store_demographics): data = store_demographics["data"] columns = list(data[0].keys()) has_perm = perm_utils.has_permission('data_demographics') + elif tab == 'tab-trajectories-datatable': + data = store_trajectories["data"] + columns = list(data[0].keys()) + has_perm = perm_utils.has_permission('data_trajectories') df = pd.DataFrame(data) if df.empty or not has_perm: diff --git a/utils/constants.py b/utils/constants.py index 56a3425..50fb172 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -48,4 +48,11 @@ BINARY_DEMOGRAPHICS_COLS = [ 'user_id', '_id', +] + +BINARY_TRAJECTORIES_COLS = [ + 'user_id', + '_id', + 'data.section', + 'data.loc.coordinates' ] \ No newline at end of file diff --git a/utils/db_utils.py b/utils/db_utils.py index 6c15eda..93df58d 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -117,6 +117,24 @@ def query_demographics(): return df +def query_trajectories(): + ts = esta.TimeSeries.get_aggregate_time_series() + + entries = ts.find_entries( + key_list=["analysis/recreated_location"], + ) + df = pd.json_normalize(list(entries)) + if not df.empty: + for col in constants.BINARY_TRAJECTORIES_COLS: + if col in df.columns: + df[col] = df[col].apply(str) + columns_to_drop = [col for col in df.columns if col.startswith("metadata")] + df.drop(columns= columns_to_drop, inplace=True) + modified_columns = perm_utils.get_trajectories_columns(df.columns) + df.columns = modified_columns + return df + + def add_user_stats(user_data): for user in user_data: user_uuid = UUID(user['user_id']) diff --git a/utils/permissions.py b/utils/permissions.py index ffc49df..50e3a41 100644 --- a/utils/permissions.py +++ b/utils/permissions.py @@ -33,6 +33,8 @@ # When all the current studies are completed we can remove the below changes. if 'data_demographics_columns_exclude' not in permissions: permissions['data_demographics_columns_exclude'] = [] +if 'data_trajectories_columns_exclude' not in permissions: + permissions['data_trajectories_columns_exclude'] = [] def has_permission(perm): return False if permissions.get(perm) is False else True @@ -96,5 +98,10 @@ def get_demographic_columns(columns): columns.discard(column) return columns +def get_trajectories_columns(columns): + for column in permissions.get("data_trajectories_columns_exclude", []): + columns.discard(column) + return columns + def get_token_prefix(): return permissions['token_prefix'] + '_' if permissions.get('token_prefix') else '' From 295e0a09e0bd6799b705cf68d847d1725642a565 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Tue, 19 Sep 2023 20:27:34 -0700 Subject: [PATCH 11/63] changed code to load trajectory table data only when the respective tab is selected --- app_sidebar_collapsible.py | 15 ++------------- pages/data.py | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index b9f163f..626cf12 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -23,7 +23,7 @@ if os.getenv('DASH_DEBUG_MODE', 'True').lower() == 'true': logging.basicConfig(level=logging.DEBUG) -from utils.db_utils import query_uuids, query_confirmed_trips, query_demographics, query_trajectories +from utils.db_utils import query_uuids, query_confirmed_trips, query_demographics from utils.permissions import has_permission import flask_talisman as flt @@ -170,24 +170,13 @@ def update_store_demographics(): demographics_data = update_store_demographics() -def update_store_trajectories(): - df = query_trajectories() - records = df.to_dict("records") - store = { - "data": records, - "length": len(records), - } - return store - -trajectories_data = update_store_trajectories() - app.layout = html.Div( [ dcc.Location(id='url', refresh=False), dcc.Store(id='store-trips', data={}), dcc.Store(id='store-uuids', data={}), dcc.Store(id='store-demographics', data= demographics_data), - dcc.Store(id='store-trajectories', data= trajectories_data), + dcc.Store(id ='store-trajectories', data = {}), html.Div(id='page-content', children=home_page), ] ) diff --git a/pages/data.py b/pages/data.py index f73be7b..2fc4629 100644 --- a/pages/data.py +++ b/pages/data.py @@ -12,7 +12,7 @@ from utils import permissions as perm_utils from utils import db_utils - +from utils.db_utils import query_trajectories register_page(__name__, path="/data") intro = """## Data""" @@ -38,6 +38,17 @@ def clean_location_data(df): df['data.end_loc.coordinates'] = df['data.end_loc.coordinates'].apply(lambda x: f'({x[0]}, {x[1]})') return df +def update_store_trajectories(): + global store_trajectories + df = query_trajectories() + records = df.to_dict("records") + store = { + "data": records, + "length": len(records), + } + store_trajectories = store + return store + @callback( Output('tabs-content', 'children'), @@ -66,6 +77,8 @@ def render_content(tab, store_uuids, store_trips, store_demographics, store_traj columns = list(data[0].keys()) has_perm = perm_utils.has_permission('data_demographics') elif tab == 'tab-trajectories-datatable': + if store_trajectories == {}: + store_trajectories = update_store_trajectories() data = store_trajectories["data"] columns = list(data[0].keys()) has_perm = perm_utils.has_permission('data_trajectories') From fb21a00aed3c59b1400dc2feffd41f77cd090bf1 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Wed, 20 Sep 2023 15:40:24 -0700 Subject: [PATCH 12/63] Removing extraneous columns from demographic table (#67) * Removing extraneous columns from demographic table * Removing extraneous columns from demographic table --- utils/constants.py | 18 ++++++++++++++++++ utils/db_utils.py | 15 ++++++++------- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/utils/constants.py b/utils/constants.py index 56a3425..6cc197b 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -48,4 +48,22 @@ BINARY_DEMOGRAPHICS_COLS = [ 'user_id', '_id', +] + +EXCLUDED_DEMOGRAPHICS_COLS = [ + 'data.xmlResponse', + 'data.name', + 'data.version', + 'data.label', + 'xmlns:jr', + 'xmlns:orx', + 'id', + 'start', + 'end', + 'attrxmlns:jr', + 'attrxmlns:orx', + 'attrid', + '__version__', + 'attrversion', + 'instanceID', ] \ No newline at end of file diff --git a/utils/db_utils.py b/utils/db_utils.py index 6c15eda..59413e9 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -107,13 +107,14 @@ def query_demographics(): for col in constants.BINARY_DEMOGRAPHICS_COLS: if col in df.columns: df[col] = df[col].apply(str) - columns_to_drop = [col for col in df.columns if col.startswith("metadata")] - df.drop(columns= columns_to_drop, inplace=True) - df.drop(columns=['data.xmlResponse', 'data.name', 'data.version', 'data.label'], inplace=True) - modified_columns = perm_utils.get_demographic_columns(df.columns) - df.columns = modified_columns - df.columns=[col.rsplit('.',1)[-1] if col.startswith('data.jsonDocResponse.') else col for col in df.columns] - df.drop(columns=['xmlns:jr', 'xmlns:orx', 'id'], inplace = True) + columns_to_drop = [col for col in df.columns if col.startswith("metadata")] + df.drop(columns= columns_to_drop, inplace=True) + modified_columns = perm_utils.get_demographic_columns(df.columns) + df.columns = modified_columns + df.columns=[col.rsplit('.',1)[-1] if col.startswith('data.jsonDocResponse.') else col for col in df.columns] + for col in constants.EXCLUDED_DEMOGRAPHICS_COLS: + if col in df.columns: + df.drop(columns= [col], inplace=True) return df From 3388281d67bc4a0f8ff3171bbbc8298caf5330a0 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Tue, 26 Sep 2023 15:44:09 -0700 Subject: [PATCH 13/63] Added comment for trajectory table. --- pages/data.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pages/data.py b/pages/data.py index 2fc4629..e0deb56 100644 --- a/pages/data.py +++ b/pages/data.py @@ -77,6 +77,8 @@ def render_content(tab, store_uuids, store_trips, store_demographics, store_traj columns = list(data[0].keys()) has_perm = perm_utils.has_permission('data_demographics') elif tab == 'tab-trajectories-datatable': + # Currently store_trajectories data is loaded only when the respective tab is selected + #Here we query for trajectory data once "Trajectories" tab is selected if store_trajectories == {}: store_trajectories = update_store_trajectories() data = store_trajectories["data"] From 4fd9f6265334ae4e76ec720243c9ed900d0421fb Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Tue, 26 Sep 2023 17:06:12 -0700 Subject: [PATCH 14/63] Used find_entries_count() function to count database entries. --- utils/db_utils.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/utils/db_utils.py b/utils/db_utils.py index 8e7246c..b2c898a 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -140,21 +140,15 @@ def add_user_stats(user_data): for user in user_data: user_uuid = UUID(user['user_id']) - # TODO: Use the time-series functions when the needed functionality is added. - total_trips = edb.get_analysis_timeseries_db().count_documents( - { - 'user_id': user_uuid, - 'metadata.key': 'analysis/confirmed_trip', - } + total_trips = esta.TimeSeries.get_aggregate_time_series().find_entries_count( + key_list=["analysis/confirmed_trip"], + extra_query_list=[{'user_id': user_uuid}] ) user['total_trips'] = total_trips - labeled_trips = edb.get_analysis_timeseries_db().count_documents( - { - 'user_id': user_uuid, - 'metadata.key': 'analysis/confirmed_trip', - 'data.user_input': {'$ne': {}}, - } + labeled_trips = esta.TimeSeries.get_aggregate_time_series().find_entries_count( + key_list=["analysis/confirmed_trip"], + extra_query_list=[{'user_id': user_uuid}, {'data.user_input': {'$ne': {}}}] ) user['labeled_trips'] = labeled_trips From 5afc128e43e3fc2a3c861574eeb101f3bbcc6096 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Fri, 29 Sep 2023 00:20:45 -0700 Subject: [PATCH 15/63] Fixed the issues with coolumns in trajectory table --- utils/constants.py | 8 -------- utils/db_utils.py | 6 +++--- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/utils/constants.py b/utils/constants.py index 5bc02e9..6cc197b 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -50,14 +50,6 @@ '_id', ] - -BINARY_TRAJECTORIES_COLS = [ - 'user_id', - '_id', - 'data.section', - 'data.loc.coordinates' -] - EXCLUDED_DEMOGRAPHICS_COLS = [ 'data.xmlResponse', 'data.name', diff --git a/utils/db_utils.py b/utils/db_utils.py index b2c898a..4024e73 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -126,9 +126,9 @@ def query_trajectories(): ) df = pd.json_normalize(list(entries)) if not df.empty: - for col in constants.BINARY_TRAJECTORIES_COLS: - if col in df.columns: - df[col] = df[col].apply(str) + for col in df.columns: + if df[col].dtype == 'object': + df[col] = df[col].apply(str) columns_to_drop = [col for col in df.columns if col.startswith("metadata")] df.drop(columns= columns_to_drop, inplace=True) modified_columns = perm_utils.get_trajectories_columns(df.columns) From f9d2d3c6172b5d530791d94823c95f442e63ae0d Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Mon, 2 Oct 2023 10:14:05 -0700 Subject: [PATCH 16/63] Fixed trajectory table to filter by date range. --- pages/data.py | 20 +++++++++++++------- utils/db_utils.py | 9 ++++++++- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/pages/data.py b/pages/data.py index e0deb56..e4f220e 100644 --- a/pages/data.py +++ b/pages/data.py @@ -4,7 +4,7 @@ The workaround is to check if the input value is None. """ from dash import dcc, html, Input, Output, callback, register_page, dash_table - +from datetime import date # Etc import logging import pandas as pd @@ -38,9 +38,9 @@ def clean_location_data(df): df['data.end_loc.coordinates'] = df['data.end_loc.coordinates'].apply(lambda x: f'({x[0]}, {x[1]})') return df -def update_store_trajectories(): +def update_store_trajectories(start_date_obj,end_date_obj): global store_trajectories - df = query_trajectories() + df = query_trajectories(start_date_obj,end_date_obj) records = df.to_dict("records") store = { "data": records, @@ -57,8 +57,11 @@ def update_store_trajectories(): Input('store-trips', 'data'), Input('store-demographics', 'data'), Input('store-trajectories', 'data'), + Input('date-picker', 'start_date'), + Input('date-picker', 'end_date'), + ) -def render_content(tab, store_uuids, store_trips, store_demographics, store_trajectories): +def render_content(tab, store_uuids, store_trips, store_demographics, store_trajectories, start_date, end_date): data, columns, has_perm = None, [], False if tab == 'tab-uuids-datatable': data = store_uuids["data"] @@ -79,11 +82,14 @@ def render_content(tab, store_uuids, store_trips, store_demographics, store_traj elif tab == 'tab-trajectories-datatable': # Currently store_trajectories data is loaded only when the respective tab is selected #Here we query for trajectory data once "Trajectories" tab is selected + start_date_obj = date.fromisoformat(start_date) if start_date else None + end_date_obj = date.fromisoformat(end_date) if end_date else None if store_trajectories == {}: - store_trajectories = update_store_trajectories() + store_trajectories = update_store_trajectories(start_date_obj,end_date_obj) data = store_trajectories["data"] - columns = list(data[0].keys()) - has_perm = perm_utils.has_permission('data_trajectories') + if data: + columns = list(data[0].keys()) + has_perm = perm_utils.has_permission('data_trajectories') df = pd.DataFrame(data) if df.empty or not has_perm: diff --git a/utils/db_utils.py b/utils/db_utils.py index 4024e73..16565ce 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -118,11 +118,18 @@ def query_demographics(): return df -def query_trajectories(): +def query_trajectories(start_date, end_date): + start_ts, end_ts = None, datetime.max.timestamp() + if start_date is not None: + start_ts = datetime.combine(start_date, datetime.min.time()).timestamp() + + if end_date is not None: + end_ts = datetime.combine(end_date, datetime.max.time()).timestamp() ts = esta.TimeSeries.get_aggregate_time_series() entries = ts.find_entries( key_list=["analysis/recreated_location"], + time_query=estt.TimeQuery("data.ts", start_ts, end_ts), ) df = pd.json_normalize(list(entries)) if not df.empty: From 32a883af749567841b237a0e8592651a00ca5ab7 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Tue, 3 Oct 2023 10:57:14 -0700 Subject: [PATCH 17/63] Removed extraneous columns and updated data.mode to a string based enum. --- utils/constants.py | 21 +++++++++++++++++++++ utils/db_utils.py | 6 ++++++ 2 files changed, 27 insertions(+) diff --git a/utils/constants.py b/utils/constants.py index 6cc197b..2b40c81 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -66,4 +66,25 @@ '__version__', 'attrversion', 'instanceID', +] + +EXCLUDED_TRAJECTORIES_COLS = [ + 'data.loc.type', + 'data.loc.coordinates', + 'data.local_dt.year', + 'data.local_dt.month', + 'data.local_dt.day', + 'data.local_dt.hour', + 'data.local_dt.minute', + 'data.local_dt.second', + 'data.local_dt.weekday', + 'data.local_dt.timezone', + 'data.local_dt_year', + 'data.local_dt_month', + 'data.local_dt_day', + 'data.local_dt_hour', + 'data.local_dt_minute', + 'data.local_dt_second', + 'data.local_dt_weekday', + 'data.local_dt_timezone', ] \ No newline at end of file diff --git a/utils/db_utils.py b/utils/db_utils.py index 16565ce..fec844b 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -10,6 +10,8 @@ import emission.core.get_database as edb import emission.storage.timeseries.abstract_timeseries as esta import emission.storage.timeseries.timequery as estt +import emission.core.wrapper.modeprediction as ecwm + from utils import constants from utils import permissions as perm_utils @@ -140,6 +142,10 @@ def query_trajectories(start_date, end_date): df.drop(columns= columns_to_drop, inplace=True) modified_columns = perm_utils.get_trajectories_columns(df.columns) df.columns = modified_columns + for col in constants.EXCLUDED_TRAJECTORIES_COLS: + if col in df.columns: + df.drop(columns= [col], inplace=True) + df['data.mode'] = df['data.mode'].apply(lambda x: ecwm.PredictedModeTypes(x).name if x in set(enum.value for enum in ecwm.PredictedModeTypes) else 'UNKNOWN') return df From 6aea730ce41496ba52a25d71aa314d6d3740bd3c Mon Sep 17 00:00:00 2001 From: Shankari Date: Thu, 5 Oct 2023 19:52:17 -0700 Subject: [PATCH 18/63] Use `MotionTypes` instead of `PredictedModeType` The recreated locations are created in the "clean and resample" stage so will only have MotionTypes (aka cleaned modes) and not the full set of predicted mode types So we cannot use the `PredictedModeType` enums to map the ints to strings Mapping using the correct enum instead... --- utils/db_utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/utils/db_utils.py b/utils/db_utils.py index fec844b..41f3227 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -10,7 +10,7 @@ import emission.core.get_database as edb import emission.storage.timeseries.abstract_timeseries as esta import emission.storage.timeseries.timequery as estt -import emission.core.wrapper.modeprediction as ecwm +import emission.core.wrapper.motionactivity as ecwm from utils import constants @@ -18,6 +18,7 @@ def query_uuids(start_date, end_date): + logging.debug("Querying the UUID DB for %s -> %s" % (start_date,end_date)) query = {'update_ts': {'$exists': True}} if start_date is not None: start_time = datetime.combine(start_date, datetime.min.time()).astimezone(timezone.utc) @@ -101,6 +102,7 @@ def query_confirmed_trips(start_date, end_date): return df def query_demographics(): + logging.debug("Querying the demographics for (no date range)") ts = esta.TimeSeries.get_aggregate_time_series() entries = ts.find_entries(["manual/demographic_survey"]) @@ -145,7 +147,7 @@ def query_trajectories(start_date, end_date): for col in constants.EXCLUDED_TRAJECTORIES_COLS: if col in df.columns: df.drop(columns= [col], inplace=True) - df['data.mode'] = df['data.mode'].apply(lambda x: ecwm.PredictedModeTypes(x).name if x in set(enum.value for enum in ecwm.PredictedModeTypes) else 'UNKNOWN') + df['data.mode_str'] = df['data.mode'].apply(lambda x: ecwm.MotionTypes(x).name if x in set(enum.value for enum in ecwm.MotionTypes) else 'UNKNOWN') return df @@ -202,4 +204,4 @@ def add_user_stats(user_data): if last_call != -1: user['last_call'] = arrow.get(last_call).format(time_format) - return user_data \ No newline at end of file + return user_data From b51928a2452d4995c0e26ecb845142febec51fb3 Mon Sep 17 00:00:00 2001 From: Shankari Date: Thu, 5 Oct 2023 19:59:42 -0700 Subject: [PATCH 19/63] Re-load the demographic table on refresh instead of only on dashboard init This fixes https://github.com/e-mission/e-mission-docs/issues/1000 Per https://github.com/e-mission/e-mission-docs/issues/1000#issuecomment-1749910030 Before this fix, while we had `update_store_demographics`, just like `update_store_uuids`, we create the demographic_data once and pass it in. while we initialize the UUIDs to empty and update them when the data, start_date or end_date change. Since there was no callback for the demographic data, it is treated as a static dataset. The fix was as simple as initializing a callback for it as well. --- app_sidebar_collapsible.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index 626cf12..6757ebb 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -157,7 +157,12 @@ content, ] -def update_store_demographics(): +@app.callback( + Output("store-demographics", "data"), + Input('date-picker', 'start_date'), + Input('date-picker', 'end_date'), +) +def update_store_demographics(start_date, end_date): df = query_demographics() records = df.to_dict("records") @@ -167,15 +172,12 @@ def update_store_demographics(): } return store -demographics_data = update_store_demographics() - - app.layout = html.Div( [ dcc.Location(id='url', refresh=False), dcc.Store(id='store-trips', data={}), dcc.Store(id='store-uuids', data={}), - dcc.Store(id='store-demographics', data= demographics_data), + dcc.Store(id='store-demographics', data= {}), dcc.Store(id ='store-trajectories', data = {}), html.Div(id='page-content', children=home_page), ] From f3f7e278967b530235fe59321f27d2ca44f826c0 Mon Sep 17 00:00:00 2001 From: Shankari Date: Fri, 6 Oct 2023 22:01:48 -0700 Subject: [PATCH 20/63] Bump up the base image to address vulnerabilities --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 8ff8dab..9f94b8b 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM shankari/e-mission-server:gis-based-mode-detection_2023-09-13--30-28 +FROM shankari/e-mission-server:master_2023-10-07--40-27 ENV DASH_DEBUG_MODE True ENV SERVER_PORT 8050 From 670f83e02c4d9e786fc3359cc3c071c279a2fe94 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 7 Oct 2023 05:05:57 +0000 Subject: [PATCH 21/63] Bump pillow from 9.5.0 to 10.0.1 Bumps [pillow](https://github.com/python-pillow/Pillow) from 9.5.0 to 10.0.1. - [Release notes](https://github.com/python-pillow/Pillow/releases) - [Changelog](https://github.com/python-pillow/Pillow/blob/main/CHANGES.rst) - [Commits](https://github.com/python-pillow/Pillow/compare/9.5.0...10.0.1) --- updated-dependencies: - dependency-name: pillow dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6f1bce8..4354127 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ dash_extensions==0.1.13 #dashboard_setup/nrel_dash_components-0.0.1.tar.gz # for docker-compose pylint==2.17.2 qrcode==7.4.2 -pillow==9.5.0 +pillow==10.0.1 requests==2.28.2 python-jose==3.3.0 flask==2.2.5 From cd4e4a1aa6fecfdd58c639e878e105baf60bb06f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 7 Oct 2023 05:08:02 +0000 Subject: [PATCH 22/63] Bump requests from 2.28.2 to 2.31.0 Bumps [requests](https://github.com/psf/requests) from 2.28.2 to 2.31.0. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.28.2...v2.31.0) --- updated-dependencies: - dependency-name: requests dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4354127..fd2583c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ dash_extensions==0.1.13 pylint==2.17.2 qrcode==7.4.2 pillow==10.0.1 -requests==2.28.2 +requests==2.31.0 python-jose==3.3.0 flask==2.2.5 flask-talisman==1.0.0 From 5cfc0446dece672172c1565d0ab72433c68ddb1b Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Wed, 18 Oct 2023 22:17:25 -0700 Subject: [PATCH 23/63] fixed issue with date filter for trajectory table --- utils/db_utils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/utils/db_utils.py b/utils/db_utils.py index 41f3227..d4f6e53 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -140,14 +140,14 @@ def query_trajectories(start_date, end_date): for col in df.columns: if df[col].dtype == 'object': df[col] = df[col].apply(str) - columns_to_drop = [col for col in df.columns if col.startswith("metadata")] - df.drop(columns= columns_to_drop, inplace=True) - modified_columns = perm_utils.get_trajectories_columns(df.columns) - df.columns = modified_columns - for col in constants.EXCLUDED_TRAJECTORIES_COLS: - if col in df.columns: - df.drop(columns= [col], inplace=True) - df['data.mode_str'] = df['data.mode'].apply(lambda x: ecwm.MotionTypes(x).name if x in set(enum.value for enum in ecwm.MotionTypes) else 'UNKNOWN') + columns_to_drop = [col for col in df.columns if col.startswith("metadata")] + df.drop(columns= columns_to_drop, inplace=True) + modified_columns = perm_utils.get_trajectories_columns(df.columns) + df.columns = modified_columns + for col in constants.EXCLUDED_TRAJECTORIES_COLS: + if col in df.columns: + df.drop(columns= [col], inplace=True) + df['data.mode_str'] = df['data.mode'].apply(lambda x: ecwm.MotionTypes(x).name if x in set(enum.value for enum in ecwm.MotionTypes) else 'UNKNOWN') return df From b0052f5a9c6e033d583658376054b96f95e5482f Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Fri, 20 Oct 2023 17:14:59 -0700 Subject: [PATCH 24/63] added some log statements to demographic table --- utils/db_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/db_utils.py b/utils/db_utils.py index d4f6e53..7265f2e 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -119,6 +119,9 @@ def query_demographics(): for col in constants.EXCLUDED_DEMOGRAPHICS_COLS: if col in df.columns: df.drop(columns= [col], inplace=True) + logging.debug("After modifying, df columns are %s" % df.columns) + logging.debug("The data in column is %s" %df.head(5)) + logging.debug("The data in column is %s" %df["Which_one_below_describe_you_b"]) return df From 6356c7bb502bdc736c1421a2c1bdda15ec7ef673 Mon Sep 17 00:00:00 2001 From: Shankari Date: Sat, 21 Oct 2023 07:27:32 -0700 Subject: [PATCH 25/63] Fix current vulnerabilities --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 9f94b8b..0b409b5 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM shankari/e-mission-server:master_2023-10-07--40-27 +FROM shankari/e-mission-server:master_2023-10-20--52-49 ENV DASH_DEBUG_MODE True ENV SERVER_PORT 8050 From fc546da31612509d177a0f74dd9c534067f8d4da Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Mon, 23 Oct 2023 17:12:53 -0700 Subject: [PATCH 26/63] fixed trajectory table issue --- pages/data.py | 3 ++- utils/db_utils.py | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pages/data.py b/pages/data.py index e4f220e..0986fbe 100644 --- a/pages/data.py +++ b/pages/data.py @@ -88,7 +88,8 @@ def render_content(tab, store_uuids, store_trips, store_demographics, store_traj store_trajectories = update_store_trajectories(start_date_obj,end_date_obj) data = store_trajectories["data"] if data: - columns = list(data[0].keys()) + columns = set(data[0].keys()) + columns = perm_utils.get_trajectories_columns(columns) has_perm = perm_utils.has_permission('data_trajectories') df = pd.DataFrame(data) diff --git a/utils/db_utils.py b/utils/db_utils.py index 7265f2e..d43d2d8 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -145,8 +145,6 @@ def query_trajectories(start_date, end_date): df[col] = df[col].apply(str) columns_to_drop = [col for col in df.columns if col.startswith("metadata")] df.drop(columns= columns_to_drop, inplace=True) - modified_columns = perm_utils.get_trajectories_columns(df.columns) - df.columns = modified_columns for col in constants.EXCLUDED_TRAJECTORIES_COLS: if col in df.columns: df.drop(columns= [col], inplace=True) From 57915cffd0d4a03762f4360917bf6d7c3c0179c1 Mon Sep 17 00:00:00 2001 From: "Mahadik, Mukul Chandrakant" Date: Tue, 24 Oct 2023 23:11:51 -0700 Subject: [PATCH 27/63] Update Docker image tag Fixed base image for server after addressing latest vulnerabilities. Updated docker image tag so dashboard can pull from the latest base server image. --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 0b409b5..e311b5e 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM shankari/e-mission-server:master_2023-10-20--52-49 +FROM shankari/e-mission-server:master_2023-10-25--24-33 ENV DASH_DEBUG_MODE True ENV SERVER_PORT 8050 From 1d52646f1ddf958fb49170bf1366e0da3f53f97c Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Wed, 25 Oct 2023 22:58:54 -0700 Subject: [PATCH 28/63] fixing trajectory table issue --- pages/data.py | 2 +- utils/permissions.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pages/data.py b/pages/data.py index 0986fbe..413d8fa 100644 --- a/pages/data.py +++ b/pages/data.py @@ -88,7 +88,7 @@ def render_content(tab, store_uuids, store_trips, store_demographics, store_traj store_trajectories = update_store_trajectories(start_date_obj,end_date_obj) data = store_trajectories["data"] if data: - columns = set(data[0].keys()) + columns = list(data[0].keys()) columns = perm_utils.get_trajectories_columns(columns) has_perm = perm_utils.has_permission('data_trajectories') diff --git a/utils/permissions.py b/utils/permissions.py index 50e3a41..a5304a4 100644 --- a/utils/permissions.py +++ b/utils/permissions.py @@ -99,6 +99,7 @@ def get_demographic_columns(columns): return columns def get_trajectories_columns(columns): + columns = set(columns) for column in permissions.get("data_trajectories_columns_exclude", []): columns.discard(column) return columns From be6beb41e9b336a4a6f2ca898859b127153fc60f Mon Sep 17 00:00:00 2001 From: "K. Shankari" Date: Fri, 3 Nov 2023 20:54:01 -0700 Subject: [PATCH 29/63] Revert "added some log statements to demographic table" --- utils/db_utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/utils/db_utils.py b/utils/db_utils.py index d43d2d8..30eefe2 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -119,9 +119,6 @@ def query_demographics(): for col in constants.EXCLUDED_DEMOGRAPHICS_COLS: if col in df.columns: df.drop(columns= [col], inplace=True) - logging.debug("After modifying, df columns are %s" % df.columns) - logging.debug("The data in column is %s" %df.head(5)) - logging.debug("The data in column is %s" %df["Which_one_below_describe_you_b"]) return df From d526c5bd9964e0b3065ffb91fd3b17fcda58f286 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Tue, 28 Nov 2023 19:36:13 -0800 Subject: [PATCH 30/63] load only last 7 days data for trip and trajectory table at begining. --- app_sidebar_collapsible.py | 10 +++++++--- pages/data.py | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index 6757ebb..c416d3a 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -10,7 +10,7 @@ For more details on building multi-page Dash applications, check out the Dash documentation: https://dash.plot.ly/urls """ import os -from datetime import date +from datetime import date, timedelta import dash import dash_bootstrap_components as dbc @@ -208,8 +208,12 @@ def update_store_uuids(start_date, end_date): Input('date-picker', 'end_date'), ) def update_store_trips(start_date, end_date): - start_date_obj = date.fromisoformat(start_date) if start_date else None - end_date_obj = date.fromisoformat(end_date) if end_date else None + if not start_date or not end_date: + end_date_obj = date.today() + start_date_obj = end_date_obj - timedelta(days=7) + else: + start_date_obj = date.fromisoformat(start_date) + end_date_obj = date.fromisoformat(end_date) df = query_confirmed_trips(start_date_obj, end_date_obj) records = df.to_dict("records") # logging.debug("returning records %s" % records[0:2]) diff --git a/pages/data.py b/pages/data.py index 413d8fa..40600ee 100644 --- a/pages/data.py +++ b/pages/data.py @@ -4,7 +4,7 @@ The workaround is to check if the input value is None. """ from dash import dcc, html, Input, Output, callback, register_page, dash_table -from datetime import date +from datetime import date, timedelta # Etc import logging import pandas as pd @@ -82,8 +82,12 @@ def render_content(tab, store_uuids, store_trips, store_demographics, store_traj elif tab == 'tab-trajectories-datatable': # Currently store_trajectories data is loaded only when the respective tab is selected #Here we query for trajectory data once "Trajectories" tab is selected - start_date_obj = date.fromisoformat(start_date) if start_date else None - end_date_obj = date.fromisoformat(end_date) if end_date else None + if not start_date or not end_date: + end_date_obj = date.today() + start_date_obj = end_date_obj - timedelta(days=7) + else: + start_date_obj = date.fromisoformat(start_date) + end_date_obj = date.fromisoformat(end_date) if store_trajectories == {}: store_trajectories = update_store_trajectories(start_date_obj,end_date_obj) data = store_trajectories["data"] From de9c9010a7543f26fbae0bcebdc3e1723da06665 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Sat, 2 Dec 2023 12:03:33 -0800 Subject: [PATCH 31/63] Changed label for trips trend --- pages/home.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pages/home.py b/pages/home.py index 3aee59a..afbf941 100644 --- a/pages/home.py +++ b/pages/home.py @@ -5,7 +5,7 @@ """ from uuid import UUID - +from datetime import date, timedelta from dash import dcc, html, Input, Output, callback, register_page import dash_bootstrap_components as dbc @@ -176,11 +176,19 @@ def generate_plot_sign_up_trend(store_uuids): @callback( Output('fig-trips-trend', 'figure'), Input('store-trips', 'data'), + Input('date-picker', 'start_date'), + Input('date-picker', 'end_date'), ) -def generate_plot_trips_trend(store_trips): +def generate_plot_trips_trend(store_trips, start_date, end_date): df = pd.DataFrame(store_trips.get("data")) trend_df = None + if not start_date or not end_date: + end_date_obj = date.today() + start_date_obj = end_date_obj - timedelta(days=7) + else: + start_date_obj = date.fromisoformat(start_date) + end_date_obj = date.fromisoformat(end_date) if not df.empty and has_permission('overview_trips_trend'): trend_df = compute_trips_trend(df, date_col = "trip_start_time_str") - fig = generate_barplot(trend_df, x = 'date', y = 'count', title = "Trips trend") + fig = generate_barplot(trend_df, x = 'date', y = 'count', title = f"Trips trend({start_date_obj} to {end_date_obj})") return fig From f36fea88c3846639357e35bf826ed11e58b3d099 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Sun, 3 Dec 2023 12:17:05 -0800 Subject: [PATCH 32/63] changed demographic table to display multiple survey in subtabs (#89) * changed demographic table to display multiple survey in subtabs * Changed demographic table to open the first subtab by default * added comments and done some changes in demographic table * made small change in demographic table code * Subtabs on demographics tabs will display only when permission is true. * Made few changes in retrieving dictionary key --- app_sidebar_collapsible.py | 5 ++-- pages/data.py | 38 +++++++++++++++++++++++++++++- utils/db_utils.py | 47 +++++++++++++++++++++++++------------- 3 files changed, 71 insertions(+), 19 deletions(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index c416d3a..d947471 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -164,8 +164,9 @@ ) def update_store_demographics(start_date, end_date): df = query_demographics() - records = df.to_dict("records") - + records = {} + for key, dataframe in df.items(): + records[key] = dataframe.to_dict("records") store = { "data": records, "length": len(records), diff --git a/pages/data.py b/pages/data.py index 40600ee..ee07857 100644 --- a/pages/data.py +++ b/pages/data.py @@ -77,8 +77,23 @@ def render_content(tab, store_uuids, store_trips, store_demographics, store_traj has_perm = perm_utils.has_permission('data_trips') elif tab == 'tab-demographics-datatable': data = store_demographics["data"] - columns = list(data[0].keys()) has_perm = perm_utils.has_permission('data_demographics') + # if only one survey is available, process it without creating a subtab + if len(data) == 1: + # here data is a dictionary + data = list(data.values())[0] + columns = list(data[0].keys()) + # for multiple survey, create subtabs for unique surveys + else: + #returns subtab only if has_perm is True + if not has_perm: + return None + return html.Div([ + dcc.Tabs(id='subtabs-demographics', value=list(data.keys())[0], children=[ + dcc.Tab(label= key, value= key) for key in data + ]), + html.Div(id='subtabs-demographics-content') + ]) elif tab == 'tab-trajectories-datatable': # Currently store_trajectories data is loaded only when the respective tab is selected #Here we query for trajectory data once "Trajectories" tab is selected @@ -105,7 +120,28 @@ def render_content(tab, store_uuids, store_trips, store_demographics, store_traj return populate_datatable(df) +# handle subtabs for demographic table when there are multiple surveys +@callback( + Output('subtabs-demographics-content', 'children'), + Input('subtabs-demographics', 'value'), + Input('store-demographics', 'data'), +) + +def update_sub_tab(tab, store_demographics): + data = store_demographics["data"] + if tab in data: + data = data[tab] + if data: + columns = list(data[0].keys()) + + df = pd.DataFrame(data) + if df.empty: + return None + + df = df.drop(columns=[col for col in df.columns if col not in columns]) + return populate_datatable(df) + def populate_datatable(df): if not isinstance(df, pd.DataFrame): raise PreventUpdate diff --git a/utils/db_utils.py b/utils/db_utils.py index 30eefe2..f19af33 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -102,25 +102,40 @@ def query_confirmed_trips(start_date, end_date): return df def query_demographics(): + # Returns dictionary of df where key represent differnt survey id and values are df for each survey logging.debug("Querying the demographics for (no date range)") ts = esta.TimeSeries.get_aggregate_time_series() - - entries = ts.find_entries(["manual/demographic_survey"]) - df = pd.json_normalize(list(entries)) - if not df.empty: - for col in constants.BINARY_DEMOGRAPHICS_COLS: - if col in df.columns: - df[col] = df[col].apply(str) - columns_to_drop = [col for col in df.columns if col.startswith("metadata")] - df.drop(columns= columns_to_drop, inplace=True) - modified_columns = perm_utils.get_demographic_columns(df.columns) - df.columns = modified_columns - df.columns=[col.rsplit('.',1)[-1] if col.startswith('data.jsonDocResponse.') else col for col in df.columns] - for col in constants.EXCLUDED_DEMOGRAPHICS_COLS: - if col in df.columns: - df.drop(columns= [col], inplace=True) - return df + entries = ts.find_entries(["manual/demographic_survey"]) + data = list(entries) + + available_key = {} + for entry in data: + survey_key = list(entry['data']['jsonDocResponse'].keys())[0] + if survey_key not in available_key: + available_key[survey_key] = [] + available_key[survey_key].append(entry) + + dataframes = {} + for key, json_object in available_key.items(): + df = pd.json_normalize(json_object) + dataframes[key] = df + + for key, df in dataframes.items(): + if not df.empty: + for col in constants.BINARY_DEMOGRAPHICS_COLS: + if col in df.columns: + df[col] = df[col].apply(str) + columns_to_drop = [col for col in df.columns if col.startswith("metadata")] + df.drop(columns= columns_to_drop, inplace=True) + modified_columns = perm_utils.get_demographic_columns(df.columns) + df.columns = modified_columns + df.columns=[col.rsplit('.',1)[-1] if col.startswith('data.jsonDocResponse.') else col for col in df.columns] + for col in constants.EXCLUDED_DEMOGRAPHICS_COLS: + if col in df.columns: + df.drop(columns= [col], inplace=True) + + return dataframes def query_trajectories(start_date, end_date): start_ts, end_ts = None, datetime.max.timestamp() From 2acc2ecd611fac45c7f140e8c12227c2a67789cd Mon Sep 17 00:00:00 2001 From: Shankari Date: Tue, 5 Dec 2023 21:41:42 -0800 Subject: [PATCH 33/63] Bump up the base image --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index e311b5e..6d6cb31 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM shankari/e-mission-server:master_2023-10-25--24-33 +FROM shankari/e-mission-server:master_2023-12-03--22-26 ENV DASH_DEBUG_MODE True ENV SERVER_PORT 8050 From b0f3ef293b6ab3a87c1036ff4ab5cf2fa4867100 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Mon, 11 Dec 2023 11:19:30 -0800 Subject: [PATCH 34/63] added mode specific filter for maps --- pages/map.py | 142 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 114 insertions(+), 28 deletions(-) diff --git a/pages/map.py b/pages/map.py index f668b6c..c9e0b03 100644 --- a/pages/map.py +++ b/pages/map.py @@ -57,19 +57,47 @@ def create_lines_map(trips_group_by_user_id, user_id_list): return fig -def create_heatmap_fig(data): +def create_heatmap_fig(trips_group_by_user_mode, user_mode_list): + coordinates = {'lat': [], 'lon': [], 'color':[]} + for user_mode in user_mode_list: + color = trips_group_by_user_mode[user_mode]['color'] + trips = trips_group_by_user_mode[user_mode]['trips'] + + for trip in trips: + coordinates['lon'].append(trip['start_coordinates'][0]) + coordinates['lon'].append(trip['end_coordinates'][0]) + coordinates['lat'].append(trip['start_coordinates'][1]) + coordinates['lat'].append(trip['end_coordinates'][1]) + coordinates['color'].extend([color,color]) + fig = go.Figure() - if len(data.get('lat', [])) > 0: + if len(coordinates.get('lat', [])) > 0: fig.add_trace( go.Densitymapbox( - lon=data['lon'], - lat=data['lat'], + lon=coordinates['lon'], + lat=coordinates['lat'], + name = '', + ) ) + + fig.add_trace( + go.Scattermapbox( + lat=coordinates['lat'], + lon=coordinates['lon'], + mode='markers', + marker=go.scattermapbox.Marker( + size=9, + color=coordinates['color'], + ), + name = '', + ) + ) + fig.update_layout( mapbox_style='open-street-map', - mapbox_center_lon=data['lon'][0], - mapbox_center_lat=data['lat'][0], + mapbox_center_lon=coordinates['lon'][0], + mapbox_center_lat=coordinates['lat'][0], mapbox_zoom=11, margin={"r": 0, "t": 30, "l": 0, "b": 0}, height=650, @@ -77,25 +105,37 @@ def create_heatmap_fig(data): return fig -def create_bubble_fig(data): +def create_bubble_fig(trips_group_by_user_mode, user_mode_list): + coordinates = {'lat': [], 'lon': [], 'color': []} + for user_mode in user_mode_list: + color = trips_group_by_user_mode[user_mode]['color'] + trips = trips_group_by_user_mode[user_mode]['trips'] + + for trip in trips: + coordinates['lon'].append(trip['start_coordinates'][0]) + coordinates['lon'].append(trip['end_coordinates'][0]) + coordinates['lat'].append(trip['start_coordinates'][1]) + coordinates['lat'].append(trip['end_coordinates'][1]) + coordinates['color'].extend([color,color]) + fig = go.Figure() - if len(data.get('lon', [])) > 0: + if len(coordinates.get('lon', [])) > 0: fig.add_trace( go.Scattermapbox( - lat=data['lat'], - lon=data['lon'], + lat=coordinates['lat'], + lon=coordinates['lon'], mode='markers', marker=go.scattermapbox.Marker( size=9, - color='royalblue', + color=coordinates['color'], ), ) ) fig.update_layout( autosize=True, mapbox_style='open-street-map', - mapbox_center_lon=data['lon'][0], - mapbox_center_lat=data['lat'][0], + mapbox_center_lon=coordinates['lon'][0], + mapbox_center_lat=coordinates['lat'][0], mapbox_zoom=11, mapbox_bearing=0, margin={'r': 0, 't': 30, 'l': 0, 'b': 0}, @@ -111,6 +151,15 @@ def get_trips_group_by_user_id(trips_data): trips_group_by_user_id = trips_df.groupby('user_id') return trips_group_by_user_id +def get_trips_group_by_user_mode(trips_data): + trips_group_by_user_mode = None + trips_df = pd.DataFrame(trips_data['data']) + data_types = trips_df.dtypes + trips_df['data.user_input.mode_confirm'] = trips_df['data.user_input.mode_confirm'].fillna('Unknown') + if not trips_df.empty: + trips_group_by_user_mode = trips_df.groupby('data.user_input.mode_confirm') + return trips_group_by_user_mode + def create_single_option(value, color): return { 'label': html.Span( @@ -148,6 +197,15 @@ def create_user_emails_options(trips_group_by_user_id): options.append(create_single_option(user_email, color)) return options, user_emails +def create_user_modes_options(trips_group_by_user_mode): + options = list() + user_modes = set() + for user_mode in trips_group_by_user_mode: + color = trips_group_by_user_mode[user_mode]['color'] + user_modes.add(user_mode) + options.append(create_single_option(user_mode, color)) + return options, user_modes + map_type_options = [] if has_permission('map_heatmap'): map_type_options.append({'label': 'Density Heatmap', 'value': 'heatmap'}) @@ -182,7 +240,11 @@ def create_user_emails_options(trips_group_by_user_id): dbc.Col([ html.Label('User Emails'), dcc.Dropdown(id='user-email-dropdown', multi=True), - ], style={'display': 'block' if has_permission('options_emails') else 'none'}) + ], style={'display': 'block' if has_permission('options_emails') else 'none'}), + dbc.Col([ + html.Label('Modes'), + dcc.Dropdown(id='user-mode-dropdown', multi=True), + ], style={'display': 'block'}) ]), dbc.Row( @@ -198,7 +260,7 @@ def create_user_emails_options(trips_group_by_user_id): Input('user-id-dropdown', 'value'), ) def update_user_ids_options(trips_data, selected_user_ids): - user_ids_options, user_ids = create_user_ids_options(trips_data['users_data']) + user_ids_options, user_ids = create_user_ids_options(trips_data[0]['users_data']) if selected_user_ids is not None: selected_user_ids = [user_id for user_id in selected_user_ids if user_id in user_ids] return user_ids_options, selected_user_ids @@ -211,31 +273,48 @@ def update_user_ids_options(trips_data, selected_user_ids): Input('user-email-dropdown', 'value'), ) def update_user_emails_options(trips_data, selected_user_emails): - user_emails_options, user_emails = create_user_emails_options(trips_data['users_data']) + user_emails_options, user_emails = create_user_emails_options(trips_data[0]['users_data']) if selected_user_emails is not None: selected_user_emails = [user_email for user_email in selected_user_emails if user_email in user_emails] return user_emails_options, selected_user_emails +@callback( + Output('user-mode-dropdown', 'options'), + Output('user-mode-dropdown', 'value'), + Input('store-trips-map', 'data'), + Input('user-mode-dropdown', 'value'), +) +def update_user_modes_options(trips_data, selected_user_modes): + user_modes_options, user_modes = create_user_modes_options(trips_data[1]['users_data']) + if selected_user_modes is not None: + selected_user_modes = [mode_confirm for mode_confirm in selected_user_modes if mode_confirm in user_modes] + return user_modes_options, selected_user_modes @callback( Output('trip-map', 'figure'), Input('map-type-dropdown', 'value'), Input('user-id-dropdown', 'value'), Input('user-email-dropdown', 'value'), + Input('user-mode-dropdown', 'value'), State('store-trips-map', 'data'), ) -def update_output(map_type, selected_user_ids, selected_user_emails, trips_data): +def update_output(map_type, selected_user_ids, selected_user_emails, selected_user_modes, trips_data): user_ids = set(selected_user_ids) if selected_user_ids is not None else set() + user_modes=set(selected_user_modes) if selected_user_modes is not None else set() if selected_user_emails is not None: for user_email in selected_user_emails: user_ids.add(str(ecwu.User.fromEmail(user_email).uuid)) - + arg1 = trips_data[0].get('users_data', {}) + arg2 = user_ids + if(selected_user_modes): + arg1 = trips_data[1].get('users_data', {}) + arg2 = user_modes if map_type == 'lines': - return create_lines_map(trips_data.get('users_data', {}), user_ids) + return create_lines_map(arg1, arg2) elif map_type == 'heatmap': - return create_heatmap_fig(trips_data.get('coordinates', {})) + return create_heatmap_fig(trips_data[1].get('users_data', {}), user_modes) elif map_type == 'bubble': - return create_bubble_fig(trips_data.get('coordinates', {})) + return create_bubble_fig(trips_data[1].get('users_data', {}), user_modes) else: return go.Figure() @@ -259,7 +338,6 @@ def control_user_dropdowns(map_type): def store_trips_map_data(trips_data): trips_group_by_user_id = get_trips_group_by_user_id(trips_data) users_data = dict() - coordinates = {'lat': [], 'lon': []} if trips_group_by_user_id: user_ids = list(trips_group_by_user_id) n = len(user_ids) % 360 @@ -268,9 +346,17 @@ def store_trips_map_data(trips_data): color = f'hsl({ind * k}, 100%, 50%)' trips = trips_group_by_user_id.get_group(user_id).sort_values('trip_start_time_str').to_dict("records") users_data[user_id] = {'color': color, 'trips': trips} - for trip in trips: - coordinates['lon'].append(trip['start_coordinates'][0]) - coordinates['lon'].append(trip['end_coordinates'][0]) - coordinates['lat'].append(trip['start_coordinates'][1]) - coordinates['lat'].append(trip['end_coordinates'][1]) - return {'users_data': users_data, 'coordinates': coordinates} + groupped_data = [] + groupped_data.append({'users_data':users_data}) + users_data = dict() + trips_group_by_user_mode = get_trips_group_by_user_mode(trips_data) + if trips_group_by_user_mode: + user_modes = list(trips_group_by_user_mode) + n = len(user_modes) % 360 + k = 359 // (n - 1) if n > 1 else 0 + for ind, user_mode in enumerate(trips_group_by_user_mode.groups.keys()): + color = f'hsl({ind * k}, 100%, 50%)' + trips = trips_group_by_user_mode.get_group(user_mode).sort_values('trip_start_time_str').to_dict("records") + users_data[user_mode] = {'color': color, 'trips': trips} + groupped_data.append({'users_data':users_data}) + return groupped_data From 8064ce454adbcac5f059b420df4a48ad5f3a931f Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Wed, 13 Dec 2023 19:40:10 -0800 Subject: [PATCH 35/63] Remove unnecessary lines from code --- pages/map.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pages/map.py b/pages/map.py index c9e0b03..d0f0935 100644 --- a/pages/map.py +++ b/pages/map.py @@ -154,9 +154,8 @@ def get_trips_group_by_user_id(trips_data): def get_trips_group_by_user_mode(trips_data): trips_group_by_user_mode = None trips_df = pd.DataFrame(trips_data['data']) - data_types = trips_df.dtypes - trips_df['data.user_input.mode_confirm'] = trips_df['data.user_input.mode_confirm'].fillna('Unknown') if not trips_df.empty: + trips_df['data.user_input.mode_confirm'] = trips_df['data.user_input.mode_confirm'].fillna('Unknown') trips_group_by_user_mode = trips_df.groupby('data.user_input.mode_confirm') return trips_group_by_user_mode From 572937deeb9cfc85da9425e778b4134a19a4d042 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Sun, 17 Dec 2023 21:39:44 -0800 Subject: [PATCH 36/63] added few changes in map filter --- pages/map.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pages/map.py b/pages/map.py index d0f0935..7341654 100644 --- a/pages/map.py +++ b/pages/map.py @@ -322,11 +322,14 @@ def update_output(map_type, selected_user_ids, selected_user_emails, selected_us Output('user-id-dropdown', 'disabled'), Output('user-email-dropdown', 'disabled'), Input('map-type-dropdown', 'value'), + Input('user-mode-dropdown', 'value'), ) -def control_user_dropdowns(map_type): +def control_user_dropdowns(map_type,selected_user_modes): disabled = True if map_type == 'lines': disabled = False + if selected_user_modes: + disabled = True return disabled, disabled From 6c66d5c005b43b57976c06117837dc39ffe9ac9b Mon Sep 17 00:00:00 2001 From: "Mahadik, Mukul Chandrakant" Date: Wed, 20 Dec 2023 16:24:00 -0700 Subject: [PATCH 37/63] Bumped up Docker image tag Bumped up latest server Docker image tag. --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 6d6cb31..f8c5c54 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM shankari/e-mission-server:master_2023-12-03--22-26 +FROM shankari/e-mission-server:master_2023-12-20--31-29 ENV DASH_DEBUG_MODE True ENV SERVER_PORT 8050 From 4659be01dc054bc693ebe4e3ce57c8641d4c5f53 Mon Sep 17 00:00:00 2001 From: "Mahadik, Mukul Chandrakant" Date: Thu, 21 Dec 2023 19:21:57 -0700 Subject: [PATCH 38/63] Updated Docker image tag Latest docker image tag added after urllib3, flask vulnerability fixes. --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index f8c5c54..83c8598 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM shankari/e-mission-server:master_2023-12-20--31-29 +FROM shankari/e-mission-server:master_2023-12-22--47-46 ENV DASH_DEBUG_MODE True ENV SERVER_PORT 8050 From afa8d6031da25fd0fb3c8dbb658e6b039fa01ad6 Mon Sep 17 00:00:00 2001 From: "Mahadik, Mukul Chandrakant" Date: Thu, 21 Dec 2023 21:38:58 -0700 Subject: [PATCH 39/63] Updated Docker image tag. Latest Docker image tag added. --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 83c8598..3e37b1d 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM shankari/e-mission-server:master_2023-12-22--47-46 +FROM shankari/e-mission-server:master_2023-12-22--17-36 ENV DASH_DEBUG_MODE True ENV SERVER_PORT 8050 From 31650b3ba1c9cc6ee32fa50d4dc0327632cab1a3 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Wed, 10 Jan 2024 08:57:54 -0800 Subject: [PATCH 40/63] cleaned and made changes in previous code. --- pages/map.py | 78 +++++++++++++++++++++------------------------------- 1 file changed, 32 insertions(+), 46 deletions(-) diff --git a/pages/map.py b/pages/map.py index 7341654..9d8f4e4 100644 --- a/pages/map.py +++ b/pages/map.py @@ -57,7 +57,7 @@ def create_lines_map(trips_group_by_user_id, user_id_list): return fig -def create_heatmap_fig(trips_group_by_user_mode, user_mode_list): +def get_map_coordinates(trips_group_by_user_mode, user_mode_list): coordinates = {'lat': [], 'lon': [], 'color':[]} for user_mode in user_mode_list: color = trips_group_by_user_mode[user_mode]['color'] @@ -69,7 +69,11 @@ def create_heatmap_fig(trips_group_by_user_mode, user_mode_list): coordinates['lat'].append(trip['start_coordinates'][1]) coordinates['lat'].append(trip['end_coordinates'][1]) coordinates['color'].extend([color,color]) + return coordinates + +def create_heatmap_fig(trips_group_by_user_mode, user_mode_list): + coordinates = get_map_coordinates(trips_group_by_user_mode, user_mode_list) fig = go.Figure() if len(coordinates.get('lat', [])) > 0: fig.add_trace( @@ -106,18 +110,7 @@ def create_heatmap_fig(trips_group_by_user_mode, user_mode_list): def create_bubble_fig(trips_group_by_user_mode, user_mode_list): - coordinates = {'lat': [], 'lon': [], 'color': []} - for user_mode in user_mode_list: - color = trips_group_by_user_mode[user_mode]['color'] - trips = trips_group_by_user_mode[user_mode]['trips'] - - for trip in trips: - coordinates['lon'].append(trip['start_coordinates'][0]) - coordinates['lon'].append(trip['end_coordinates'][0]) - coordinates['lat'].append(trip['start_coordinates'][1]) - coordinates['lat'].append(trip['end_coordinates'][1]) - coordinates['color'].extend([color,color]) - + coordinates = get_map_coordinates(trips_group_by_user_mode, user_mode_list) fig = go.Figure() if len(coordinates.get('lon', [])) > 0: fig.add_trace( @@ -259,7 +252,7 @@ def create_user_modes_options(trips_group_by_user_mode): Input('user-id-dropdown', 'value'), ) def update_user_ids_options(trips_data, selected_user_ids): - user_ids_options, user_ids = create_user_ids_options(trips_data[0]['users_data']) + user_ids_options, user_ids = create_user_ids_options(trips_data['users_data_by_user_id']) if selected_user_ids is not None: selected_user_ids = [user_id for user_id in selected_user_ids if user_id in user_ids] return user_ids_options, selected_user_ids @@ -272,7 +265,7 @@ def update_user_ids_options(trips_data, selected_user_ids): Input('user-email-dropdown', 'value'), ) def update_user_emails_options(trips_data, selected_user_emails): - user_emails_options, user_emails = create_user_emails_options(trips_data[0]['users_data']) + user_emails_options, user_emails = create_user_emails_options(trips_data['users_data_by_user_id']) if selected_user_emails is not None: selected_user_emails = [user_email for user_email in selected_user_emails if user_email in user_emails] return user_emails_options, selected_user_emails @@ -284,7 +277,7 @@ def update_user_emails_options(trips_data, selected_user_emails): Input('user-mode-dropdown', 'value'), ) def update_user_modes_options(trips_data, selected_user_modes): - user_modes_options, user_modes = create_user_modes_options(trips_data[1]['users_data']) + user_modes_options, user_modes = create_user_modes_options(trips_data['users_data_by_user_mode']) if selected_user_modes is not None: selected_user_modes = [mode_confirm for mode_confirm in selected_user_modes if mode_confirm in user_modes] return user_modes_options, selected_user_modes @@ -303,17 +296,14 @@ def update_output(map_type, selected_user_ids, selected_user_emails, selected_us if selected_user_emails is not None: for user_email in selected_user_emails: user_ids.add(str(ecwu.User.fromEmail(user_email).uuid)) - arg1 = trips_data[0].get('users_data', {}) - arg2 = user_ids - if(selected_user_modes): - arg1 = trips_data[1].get('users_data', {}) - arg2 = user_modes if map_type == 'lines': - return create_lines_map(arg1, arg2) + if selected_user_modes: + return create_lines_map(trips_data.get('users_data_by_user_mode', {}), user_modes) + return create_lines_map(trips_data.get('users_data_by_user_id', {}), user_ids) elif map_type == 'heatmap': - return create_heatmap_fig(trips_data[1].get('users_data', {}), user_modes) + return create_heatmap_fig(trips_data.get('users_data_by_user_mode', {}), user_modes) elif map_type == 'bubble': - return create_bubble_fig(trips_data[1].get('users_data', {}), user_modes) + return create_bubble_fig(trips_data.get('users_data_by_user_mode', {}), user_modes) else: return go.Figure() @@ -333,32 +323,28 @@ def control_user_dropdowns(map_type,selected_user_modes): return disabled, disabled +def process_trips_group(trips_group): + users_data = dict() + if trips_group: + keys = list(trips_group) + n = len(keys) % 360 + k = 359 // (n - 1) if n > 1 else 0 + for ind, key in enumerate(trips_group.groups.keys()): + color = f'hsl({ind * k}, 100%, 50%)' + trips = trips_group.get_group(key).sort_values('trip_start_time_str').to_dict("records") + users_data[key] = {'color': color, 'trips': trips} + return users_data + + @callback( Output('store-trips-map', 'data'), Input('store-trips', 'data'), ) def store_trips_map_data(trips_data): trips_group_by_user_id = get_trips_group_by_user_id(trips_data) - users_data = dict() - if trips_group_by_user_id: - user_ids = list(trips_group_by_user_id) - n = len(user_ids) % 360 - k = 359 // (n - 1) if n > 1 else 0 - for ind, user_id in enumerate(trips_group_by_user_id.groups.keys()): - color = f'hsl({ind * k}, 100%, 50%)' - trips = trips_group_by_user_id.get_group(user_id).sort_values('trip_start_time_str').to_dict("records") - users_data[user_id] = {'color': color, 'trips': trips} - groupped_data = [] - groupped_data.append({'users_data':users_data}) - users_data = dict() + users_data_by_user_id = process_trips_group(trips_group_by_user_id) + trips_group_by_user_mode = get_trips_group_by_user_mode(trips_data) - if trips_group_by_user_mode: - user_modes = list(trips_group_by_user_mode) - n = len(user_modes) % 360 - k = 359 // (n - 1) if n > 1 else 0 - for ind, user_mode in enumerate(trips_group_by_user_mode.groups.keys()): - color = f'hsl({ind * k}, 100%, 50%)' - trips = trips_group_by_user_mode.get_group(user_mode).sort_values('trip_start_time_str').to_dict("records") - users_data[user_mode] = {'color': color, 'trips': trips} - groupped_data.append({'users_data':users_data}) - return groupped_data + users_data_by_user_mode = process_trips_group(trips_group_by_user_mode) + + return {'users_data_by_user_id':users_data_by_user_id, 'users_data_by_user_mode':users_data_by_user_mode} \ No newline at end of file From 9ddc5e265ecbadc7aa68bf3206481053ed2674c8 Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Wed, 17 Jan 2024 23:19:49 -0500 Subject: [PATCH 41/63] set initial date range on datepicker On the initial load, the user has not selected a date and we fallback to only showing the last week of data. The datepicker should convey this information rather than showing blank initially. This change sets initial start and end date values to the datepicker. As a result, we will no longer need extra checks for the initial condition when start date or end date have not been set yet. This simplifies the code in a few places. --- app_sidebar_collapsible.py | 12 ++++-------- pages/data.py | 8 ++------ pages/home.py | 8 ++------ 3 files changed, 8 insertions(+), 20 deletions(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index d947471..c9f6428 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -131,8 +131,8 @@ dcc.DatePickerRange( id='date-picker', display_format='D/M/Y', - start_date_placeholder_text='D/M/Y', - end_date_placeholder_text='D/M/Y', + start_date=date.today() - timedelta(days=7), + end_date=date.today(), min_date_allowed=date(2010, 1, 1), max_date_allowed=date.today(), initial_visible_month=date.today(), @@ -209,12 +209,8 @@ def update_store_uuids(start_date, end_date): Input('date-picker', 'end_date'), ) def update_store_trips(start_date, end_date): - if not start_date or not end_date: - end_date_obj = date.today() - start_date_obj = end_date_obj - timedelta(days=7) - else: - start_date_obj = date.fromisoformat(start_date) - end_date_obj = date.fromisoformat(end_date) + start_date_obj = date.fromisoformat(start_date) + end_date_obj = date.fromisoformat(end_date) df = query_confirmed_trips(start_date_obj, end_date_obj) records = df.to_dict("records") # logging.debug("returning records %s" % records[0:2]) diff --git a/pages/data.py b/pages/data.py index ee07857..f54b018 100644 --- a/pages/data.py +++ b/pages/data.py @@ -97,12 +97,8 @@ def render_content(tab, store_uuids, store_trips, store_demographics, store_traj elif tab == 'tab-trajectories-datatable': # Currently store_trajectories data is loaded only when the respective tab is selected #Here we query for trajectory data once "Trajectories" tab is selected - if not start_date or not end_date: - end_date_obj = date.today() - start_date_obj = end_date_obj - timedelta(days=7) - else: - start_date_obj = date.fromisoformat(start_date) - end_date_obj = date.fromisoformat(end_date) + start_date_obj = date.fromisoformat(start_date) + end_date_obj = date.fromisoformat(end_date) if store_trajectories == {}: store_trajectories = update_store_trajectories(start_date_obj,end_date_obj) data = store_trajectories["data"] diff --git a/pages/home.py b/pages/home.py index afbf941..45529dd 100644 --- a/pages/home.py +++ b/pages/home.py @@ -182,12 +182,8 @@ def generate_plot_sign_up_trend(store_uuids): def generate_plot_trips_trend(store_trips, start_date, end_date): df = pd.DataFrame(store_trips.get("data")) trend_df = None - if not start_date or not end_date: - end_date_obj = date.today() - start_date_obj = end_date_obj - timedelta(days=7) - else: - start_date_obj = date.fromisoformat(start_date) - end_date_obj = date.fromisoformat(end_date) + start_date_obj = date.fromisoformat(start_date) + end_date_obj = date.fromisoformat(end_date) if not df.empty and has_permission('overview_trips_trend'): trend_df = compute_trips_trend(df, date_col = "trip_start_time_str") fig = generate_barplot(trend_df, x = 'date', y = 'count', title = f"Trips trend({start_date_obj} to {end_date_obj})") From 9016c71d271148cfa770dc0e4211ccd3a6f8e4a3 Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Wed, 17 Jan 2024 23:23:47 -0500 Subject: [PATCH 42/63] use unambiguous format on datepicker (D MMM Y) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consider "1/5/2024" – in the US this seems like January 5, while in most of the world it seems like May 1. Whether we did D/M/Y or M/D/Y, it'd probably cause some confusion since the project is used on an international scale. So instead of just digits, the new format will be "5 Jan 2024". Unambiguous and still pretty concise. --- app_sidebar_collapsible.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index c9f6428..24afdb2 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -130,7 +130,7 @@ html.Div( dcc.DatePickerRange( id='date-picker', - display_format='D/M/Y', + display_format='D MMM Y', start_date=date.today() - timedelta(days=7), end_date=date.today(), min_date_allowed=date(2010, 1, 1), From 5105ac8097edcdc5a2447f0883fab8fb61e61cf3 Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Mon, 22 Jan 2024 13:21:51 -0500 Subject: [PATCH 43/63] use arrow instead of datetime This change adds the `arrow` package as a drop-in replacement for any places where `datetime` was used. The behavior should not change with this diff - this is only for code cleanliness & consistency with the server **Note: `arrow` does not have 'min' and 'max' variables like `datetime`. So, these are manually replaced. E.g. min time = 00:00, max time = 11:59, max timestamp = maximum epoch time --- app_sidebar_collapsible.py | 34 ++++++++++++++++++++-------------- pages/data.py | 12 +++++------- pages/home.py | 6 ++---- requirements.txt | 1 + utils/db_utils.py | 31 ++++++++++++++++--------------- 5 files changed, 44 insertions(+), 40 deletions(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index 24afdb2..4c60fe1 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -10,7 +10,7 @@ For more details on building multi-page Dash applications, check out the Dash documentation: https://dash.plot.ly/urls """ import os -from datetime import date, timedelta +import arrow import dash import dash_bootstrap_components as dbc @@ -124,6 +124,10 @@ className="sidebar", ) +# according to docs, DatePickerRange will accept YYYY-MM-DD format +today_date = arrow.now().format('YYYY-MM-DD') +last_week_date = arrow.now().shift(days=-7).format('YYYY-MM-DD') +tomorrow_date = arrow.now().shift(days=1).format('YYYY-MM-DD') content = html.Div([ # Global Date Picker @@ -131,11 +135,11 @@ dcc.DatePickerRange( id='date-picker', display_format='D MMM Y', - start_date=date.today() - timedelta(days=7), - end_date=date.today(), - min_date_allowed=date(2010, 1, 1), - max_date_allowed=date.today(), - initial_visible_month=date.today(), + start_date=last_week_date, + end_date=today_date, + min_date_allowed='2010-1-1', + max_date_allowed=tomorrow_date, + initial_visible_month=today_date, ), style={'margin': '10px 10px 0 0', 'display': 'flex', 'justify-content': 'right'} ), @@ -188,13 +192,14 @@ def update_store_demographics(start_date, end_date): # Load data stores @app.callback( Output("store-uuids", "data"), - Input('date-picker', 'start_date'), - Input('date-picker', 'end_date'), + Input('date-picker', 'start_date'), # these are ISO strings + Input('date-picker', 'end_date'), # these are ISO strings ) def update_store_uuids(start_date, end_date): - start_date_obj = date.fromisoformat(start_date) if start_date else None - end_date_obj = date.fromisoformat(end_date) if end_date else None - dff = query_uuids(start_date_obj, end_date_obj) + # trim the time part, leaving only date as YYYY-MM-DD + start_date = start_date[:10] if start_date else None + end_date = end_date[:10] if end_date else None + dff = query_uuids(start_date, end_date) records = dff.to_dict("records") store = { "data": records, @@ -209,9 +214,10 @@ def update_store_uuids(start_date, end_date): Input('date-picker', 'end_date'), ) def update_store_trips(start_date, end_date): - start_date_obj = date.fromisoformat(start_date) - end_date_obj = date.fromisoformat(end_date) - df = query_confirmed_trips(start_date_obj, end_date_obj) + # trim the time part, leaving only date as YYYY-MM-DD + start_date = start_date[:10] if start_date else None + end_date = end_date[:10] if end_date else None + df = query_confirmed_trips(start_date, end_date) records = df.to_dict("records") # logging.debug("returning records %s" % records[0:2]) store = { diff --git a/pages/data.py b/pages/data.py index f54b018..b7519c4 100644 --- a/pages/data.py +++ b/pages/data.py @@ -4,7 +4,6 @@ The workaround is to check if the input value is None. """ from dash import dcc, html, Input, Output, callback, register_page, dash_table -from datetime import date, timedelta # Etc import logging import pandas as pd @@ -38,10 +37,10 @@ def clean_location_data(df): df['data.end_loc.coordinates'] = df['data.end_loc.coordinates'].apply(lambda x: f'({x[0]}, {x[1]})') return df -def update_store_trajectories(start_date_obj,end_date_obj): +def update_store_trajectories(start_date: str, end_date: str): global store_trajectories - df = query_trajectories(start_date_obj,end_date_obj) - records = df.to_dict("records") + df = query_trajectories(start_date, end_date) + records = df.to_dict("records") store = { "data": records, "length": len(records), @@ -97,10 +96,9 @@ def render_content(tab, store_uuids, store_trips, store_demographics, store_traj elif tab == 'tab-trajectories-datatable': # Currently store_trajectories data is loaded only when the respective tab is selected #Here we query for trajectory data once "Trajectories" tab is selected - start_date_obj = date.fromisoformat(start_date) - end_date_obj = date.fromisoformat(end_date) + start_date, end_date = start_date[:10], end_date[:10] # dates as YYYY-MM-DD if store_trajectories == {}: - store_trajectories = update_store_trajectories(start_date_obj,end_date_obj) + store_trajectories = update_store_trajectories(start_date, end_date) data = store_trajectories["data"] if data: columns = list(data[0].keys()) diff --git a/pages/home.py b/pages/home.py index 45529dd..29e853d 100644 --- a/pages/home.py +++ b/pages/home.py @@ -5,7 +5,6 @@ """ from uuid import UUID -from datetime import date, timedelta from dash import dcc, html, Input, Output, callback, register_page import dash_bootstrap_components as dbc @@ -182,9 +181,8 @@ def generate_plot_sign_up_trend(store_uuids): def generate_plot_trips_trend(store_trips, start_date, end_date): df = pd.DataFrame(store_trips.get("data")) trend_df = None - start_date_obj = date.fromisoformat(start_date) - end_date_obj = date.fromisoformat(end_date) + start_date, end_date = start_date[:10], end_date[:10] # dates as YYYY-MM-DD if not df.empty and has_permission('overview_trips_trend'): trend_df = compute_trips_trend(df, date_col = "trip_start_time_str") - fig = generate_barplot(trend_df, x = 'date', y = 'count', title = f"Trips trend({start_date_obj} to {end_date_obj})") + fig = generate_barplot(trend_df, x = 'date', y = 'count', title = f"Trips trend({start_date} to {end_date})") return fig diff --git a/requirements.txt b/requirements.txt index fd2583c..55f3429 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,3 +14,4 @@ python-jose==3.3.0 flask==2.2.5 flask-talisman==1.0.0 dash_auth==2.0.0 +arrow==1.3.0 diff --git a/utils/db_utils.py b/utils/db_utils.py index f19af33..06eea57 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -1,5 +1,5 @@ import logging -from datetime import datetime, timezone +import arrow from uuid import UUID import arrow @@ -16,16 +16,19 @@ from utils import constants from utils import permissions as perm_utils +MAX_EPOCH_TIME = 2 ** 31 - 1 -def query_uuids(start_date, end_date): +def query_uuids(start_date: str, end_date: str): logging.debug("Querying the UUID DB for %s -> %s" % (start_date,end_date)) query = {'update_ts': {'$exists': True}} if start_date is not None: - start_time = datetime.combine(start_date, datetime.min.time()).astimezone(timezone.utc) + # have arrow create a datetime using start_date and time 00:00:00 in UTC + start_time = arrow.get(start_date).datetime query['update_ts']['$gte'] = start_time if end_date is not None: - end_time = datetime.combine(end_date, datetime.max.time()).astimezone(timezone.utc) + # have arrow create a datetime using end_date and time 23:59:59 in UTC + end_time = arrow.get(end_date).replace(hour=23, minute=59, second=59).datetime query['update_ts']['$lt'] = end_time projection = { @@ -49,13 +52,12 @@ def query_uuids(start_date, end_date): df.drop(columns=["uuid", "_id"], inplace=True) return df -def query_confirmed_trips(start_date, end_date): - start_ts, end_ts = None, datetime.max.timestamp() +def query_confirmed_trips(start_date: str, end_date: str): + start_ts, end_ts = None, MAX_EPOCH_TIME if start_date is not None: - start_ts = datetime.combine(start_date, datetime.min.time()).timestamp() - + start_ts = arrow.get(start_date).timestamp() if end_date is not None: - end_ts = datetime.combine(end_date, datetime.max.time()).timestamp() + end_ts = arrow.get(end_date).replace(hour=23, minute=59, second=59).timestamp() ts = esta.TimeSeries.get_aggregate_time_series() # Note to self, allow end_ts to also be null in the timequery @@ -137,15 +139,14 @@ def query_demographics(): return dataframes -def query_trajectories(start_date, end_date): - start_ts, end_ts = None, datetime.max.timestamp() +def query_trajectories(start_date: str, end_date: str): + start_ts, end_ts = None, MAX_EPOCH_TIME if start_date is not None: - start_ts = datetime.combine(start_date, datetime.min.time()).timestamp() - + start_ts = arrow.get(start_date).timestamp() if end_date is not None: - end_ts = datetime.combine(end_date, datetime.max.time()).timestamp() + end_ts = arrow.get(end_date).replace(hour=23, minute=59, second=59).timestamp() + ts = esta.TimeSeries.get_aggregate_time_series() - entries = ts.find_entries( key_list=["analysis/recreated_location"], time_query=estt.TimeQuery("data.ts", start_ts, end_ts), From 75e30c3e40273a6a5654e2bd46e0e0c8aac03325 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Tue, 23 Jan 2024 14:51:20 -0800 Subject: [PATCH 44/63] Skip sorting while processing trips group The process_trip_group function, processes a group of trips, assigns color to each group, sorts the trips within each group and stores the processed data in a dictionary. We are skipping sorting part as we are plotting all co-ordinates collectively and there is no advantage of sorting it here. --- pages/map.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pages/map.py b/pages/map.py index 9d8f4e4..cb5ef57 100644 --- a/pages/map.py +++ b/pages/map.py @@ -325,13 +325,14 @@ def control_user_dropdowns(map_type,selected_user_modes): def process_trips_group(trips_group): users_data = dict() + #processes a group of trips, assigns color to each group and stores the processed data in a dictionary if trips_group: keys = list(trips_group) n = len(keys) % 360 k = 359 // (n - 1) if n > 1 else 0 for ind, key in enumerate(trips_group.groups.keys()): color = f'hsl({ind * k}, 100%, 50%)' - trips = trips_group.get_group(key).sort_values('trip_start_time_str').to_dict("records") + trips = trips_group.get_group(key).to_dict("records") users_data[key] = {'color': color, 'trips': trips} return users_data From 2a55845df9bea568229b0875cd7714c495243617 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Tue, 23 Jan 2024 15:57:19 -0800 Subject: [PATCH 45/63] Pass cordinates directly to different map. --- pages/map.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pages/map.py b/pages/map.py index cb5ef57..de3ce58 100644 --- a/pages/map.py +++ b/pages/map.py @@ -72,8 +72,7 @@ def get_map_coordinates(trips_group_by_user_mode, user_mode_list): return coordinates -def create_heatmap_fig(trips_group_by_user_mode, user_mode_list): - coordinates = get_map_coordinates(trips_group_by_user_mode, user_mode_list) +def create_heatmap_fig(coordinates): fig = go.Figure() if len(coordinates.get('lat', [])) > 0: fig.add_trace( @@ -109,8 +108,7 @@ def create_heatmap_fig(trips_group_by_user_mode, user_mode_list): return fig -def create_bubble_fig(trips_group_by_user_mode, user_mode_list): - coordinates = get_map_coordinates(trips_group_by_user_mode, user_mode_list) +def create_bubble_fig(coordinates): fig = go.Figure() if len(coordinates.get('lon', [])) > 0: fig.add_trace( @@ -293,6 +291,7 @@ def update_user_modes_options(trips_data, selected_user_modes): def update_output(map_type, selected_user_ids, selected_user_emails, selected_user_modes, trips_data): user_ids = set(selected_user_ids) if selected_user_ids is not None else set() user_modes=set(selected_user_modes) if selected_user_modes is not None else set() + coordinates = get_map_coordinates(trips_data.get('users_data_by_user_mode', {}), user_modes) if selected_user_emails is not None: for user_email in selected_user_emails: user_ids.add(str(ecwu.User.fromEmail(user_email).uuid)) @@ -301,9 +300,9 @@ def update_output(map_type, selected_user_ids, selected_user_emails, selected_us return create_lines_map(trips_data.get('users_data_by_user_mode', {}), user_modes) return create_lines_map(trips_data.get('users_data_by_user_id', {}), user_ids) elif map_type == 'heatmap': - return create_heatmap_fig(trips_data.get('users_data_by_user_mode', {}), user_modes) + return create_heatmap_fig(coordinates) elif map_type == 'bubble': - return create_bubble_fig(trips_data.get('users_data_by_user_mode', {}), user_modes) + return create_bubble_fig(coordinates) else: return go.Figure() From f9a6265d5fa9c9086b3b3fce4c0c59a1b84f36a0 Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Fri, 26 Jan 2024 23:18:06 -0500 Subject: [PATCH 46/63] add timezone dropdown to datepicker From https://github.com/e-mission/op-admin-dashboard/issues/73#issuecomment-1908770258 Implements a dropdown which displays below the datepicker and allows choosing between UTC vs local time as the basis for the date selection queries. So 'timezone' is passed now as an additional argument to the querying functions. --- app_sidebar_collapsible.py | 47 ++++++++++++++++++++++++++++++-------- pages/data.py | 10 ++++---- utils/db_utils.py | 30 ++++++++++++++++++------ 3 files changed, 66 insertions(+), 21 deletions(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index 4c60fe1..dd1c67b 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -131,7 +131,7 @@ content = html.Div([ # Global Date Picker - html.Div( + html.Div([ dcc.DatePickerRange( id='date-picker', display_format='D MMM Y', @@ -140,7 +140,33 @@ min_date_allowed='2010-1-1', max_date_allowed=tomorrow_date, initial_visible_month=today_date, - ), style={'margin': '10px 10px 0 0', 'display': 'flex', 'justify-content': 'right'} + ), + html.Div([ + html.Span('Query trips using: ', style={'margin-right': '10px'}), + dcc.Dropdown( + id='date-picker-timezone', + options=[ + {'label': 'UTC Time', 'value': 'utc'}, + {'label': 'My Local Timezone', 'value': 'local'}, + # {'label': 'Local Timezone of Trips', 'value': 'trips'}, + ], + value='utc', + clearable=False, + searchable=False, + style={'width': '220px'}, + ), + ], + style={'margin': '10px 10px 0 0', + 'display': 'flex', + 'justify-content': 'right', + 'align-items': 'center'}, + + ), + ], + style={'margin': '10px 10px 0 0', + 'display': 'flex', + 'flex-direction': 'column', + 'align-items': 'end'} ), # Pages Content @@ -165,8 +191,9 @@ Output("store-demographics", "data"), Input('date-picker', 'start_date'), Input('date-picker', 'end_date'), + Input('date-picker-timezone', 'value'), ) -def update_store_demographics(start_date, end_date): +def update_store_demographics(start_date, end_date, timezone): df = query_demographics() records = {} for key, dataframe in df.items(): @@ -192,14 +219,15 @@ def update_store_demographics(start_date, end_date): # Load data stores @app.callback( Output("store-uuids", "data"), - Input('date-picker', 'start_date'), # these are ISO strings - Input('date-picker', 'end_date'), # these are ISO strings + Input('date-picker', 'start_date'), # these are ISO strings + Input('date-picker', 'end_date'), # these are ISO strings + Input('date-picker-timezone', 'value'), ) -def update_store_uuids(start_date, end_date): +def update_store_uuids(start_date, end_date, timezone): # trim the time part, leaving only date as YYYY-MM-DD start_date = start_date[:10] if start_date else None end_date = end_date[:10] if end_date else None - dff = query_uuids(start_date, end_date) + dff = query_uuids(start_date, end_date, timezone) records = dff.to_dict("records") store = { "data": records, @@ -212,12 +240,13 @@ def update_store_uuids(start_date, end_date): Output("store-trips", "data"), Input('date-picker', 'start_date'), Input('date-picker', 'end_date'), + Input('date-picker-timezone', 'value'), ) -def update_store_trips(start_date, end_date): +def update_store_trips(start_date, end_date, timezone): # trim the time part, leaving only date as YYYY-MM-DD start_date = start_date[:10] if start_date else None end_date = end_date[:10] if end_date else None - df = query_confirmed_trips(start_date, end_date) + df = query_confirmed_trips(start_date, end_date, timezone) records = df.to_dict("records") # logging.debug("returning records %s" % records[0:2]) store = { diff --git a/pages/data.py b/pages/data.py index b7519c4..ab3a543 100644 --- a/pages/data.py +++ b/pages/data.py @@ -37,9 +37,9 @@ def clean_location_data(df): df['data.end_loc.coordinates'] = df['data.end_loc.coordinates'].apply(lambda x: f'({x[0]}, {x[1]})') return df -def update_store_trajectories(start_date: str, end_date: str): +def update_store_trajectories(start_date: str, end_date: str, tz: str): global store_trajectories - df = query_trajectories(start_date, end_date) + df = query_trajectories(start_date, end_date, tz) records = df.to_dict("records") store = { "data": records, @@ -58,9 +58,9 @@ def update_store_trajectories(start_date: str, end_date: str): Input('store-trajectories', 'data'), Input('date-picker', 'start_date'), Input('date-picker', 'end_date'), - + Input('date-picker-timezone', 'value'), ) -def render_content(tab, store_uuids, store_trips, store_demographics, store_trajectories, start_date, end_date): +def render_content(tab, store_uuids, store_trips, store_demographics, store_trajectories, start_date, end_date, timezone): data, columns, has_perm = None, [], False if tab == 'tab-uuids-datatable': data = store_uuids["data"] @@ -98,7 +98,7 @@ def render_content(tab, store_uuids, store_trips, store_demographics, store_traj #Here we query for trajectory data once "Trajectories" tab is selected start_date, end_date = start_date[:10], end_date[:10] # dates as YYYY-MM-DD if store_trajectories == {}: - store_trajectories = update_store_trajectories(start_date, end_date) + store_trajectories = update_store_trajectories(start_date, end_date, timezone) data = store_trajectories["data"] if data: columns = list(data[0].keys()) diff --git a/utils/db_utils.py b/utils/db_utils.py index 06eea57..9bc0d10 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -18,7 +18,7 @@ MAX_EPOCH_TIME = 2 ** 31 - 1 -def query_uuids(start_date: str, end_date: str): +def query_uuids(start_date: str, end_date: str, tz: str): logging.debug("Querying the UUID DB for %s -> %s" % (start_date,end_date)) query = {'update_ts': {'$exists': True}} if start_date is not None: @@ -52,12 +52,20 @@ def query_uuids(start_date: str, end_date: str): df.drop(columns=["uuid", "_id"], inplace=True) return df -def query_confirmed_trips(start_date: str, end_date: str): +def query_confirmed_trips(start_date: str, end_date: str, tz: str): start_ts, end_ts = None, MAX_EPOCH_TIME if start_date is not None: - start_ts = arrow.get(start_date).timestamp() + if tz == 'utc': + start_ts = arrow.get(start_date).timestamp() + elif tz == 'local': + start_ts = arrow.get(start_date, tzinfo='local').timestamp() if end_date is not None: - end_ts = arrow.get(end_date).replace(hour=23, minute=59, second=59).timestamp() + if tz == 'utc': + end_ts = arrow.get(end_date).replace( + hour=23, minute=59, second=59).timestamp() + elif tz == 'local': + end_ts = arrow.get(end_date, tzinfo='local').replace( + hour=23, minute=59, second=59).timestamp() ts = esta.TimeSeries.get_aggregate_time_series() # Note to self, allow end_ts to also be null in the timequery @@ -139,12 +147,20 @@ def query_demographics(): return dataframes -def query_trajectories(start_date: str, end_date: str): +def query_trajectories(start_date: str, end_date: str, tz: str): start_ts, end_ts = None, MAX_EPOCH_TIME if start_date is not None: - start_ts = arrow.get(start_date).timestamp() + if tz == 'utc': + start_ts = arrow.get(start_date).timestamp() + elif tz == 'local': + start_ts = arrow.get(start_date, tzinfo='local').timestamp() if end_date is not None: - end_ts = arrow.get(end_date).replace(hour=23, minute=59, second=59).timestamp() + if tz == 'utc': + end_ts = arrow.get(end_date).replace( + hour=23, minute=59, second=59).timestamp() + elif tz == 'local': + end_ts = arrow.get(end_date, tzinfo='local').replace( + hour=23, minute=59, second=59).timestamp() ts = esta.TimeSeries.get_aggregate_time_series() entries = ts.find_entries( From 746ae03560c21ef360af34c5ad27734d35646d50 Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Fri, 26 Jan 2024 23:47:14 -0500 Subject: [PATCH 47/63] refactor time queries for 'query' functions I noticed this code is repeated and since timezone is passed as a 3rd argument 'tz', it warrants extracting to a separate function Created a function 'get_ts_range' to use in both places. --- utils/db_utils.py | 51 +++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/utils/db_utils.py b/utils/db_utils.py index 9bc0d10..960b37d 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -18,6 +18,26 @@ MAX_EPOCH_TIME = 2 ** 31 - 1 +def get_ts_range(start_date: str, end_date: str, tz: str): + """ + Returns a tuple of (start_ts, end_ts) as timestamps, given start_date and end_date in ISO format + and the timezone mode in which the dates should be resolved to timestamps ('utc' or 'local') + """ + start_ts, end_ts = None, MAX_EPOCH_TIME + if start_date is not None: + if tz == 'utc': + start_ts = arrow.get(start_date).timestamp() + elif tz == 'local': + start_ts = arrow.get(start_date, tzinfo='local').timestamp() + if end_date is not None: + if tz == 'utc': + end_ts = arrow.get(end_date).replace( + hour=23, minute=59, second=59).timestamp() + elif tz == 'local': + end_ts = arrow.get(end_date, tzinfo='local').replace( + hour=23, minute=59, second=59).timestamp() + return (start_ts, end_ts) + def query_uuids(start_date: str, end_date: str, tz: str): logging.debug("Querying the UUID DB for %s -> %s" % (start_date,end_date)) query = {'update_ts': {'$exists': True}} @@ -53,20 +73,7 @@ def query_uuids(start_date: str, end_date: str, tz: str): return df def query_confirmed_trips(start_date: str, end_date: str, tz: str): - start_ts, end_ts = None, MAX_EPOCH_TIME - if start_date is not None: - if tz == 'utc': - start_ts = arrow.get(start_date).timestamp() - elif tz == 'local': - start_ts = arrow.get(start_date, tzinfo='local').timestamp() - if end_date is not None: - if tz == 'utc': - end_ts = arrow.get(end_date).replace( - hour=23, minute=59, second=59).timestamp() - elif tz == 'local': - end_ts = arrow.get(end_date, tzinfo='local').replace( - hour=23, minute=59, second=59).timestamp() - + (start_ts, end_ts) = get_ts_range(start_date, end_date, tz) ts = esta.TimeSeries.get_aggregate_time_series() # Note to self, allow end_ts to also be null in the timequery # we can then remove the start_time, end_time logic @@ -148,20 +155,8 @@ def query_demographics(): return dataframes def query_trajectories(start_date: str, end_date: str, tz: str): - start_ts, end_ts = None, MAX_EPOCH_TIME - if start_date is not None: - if tz == 'utc': - start_ts = arrow.get(start_date).timestamp() - elif tz == 'local': - start_ts = arrow.get(start_date, tzinfo='local').timestamp() - if end_date is not None: - if tz == 'utc': - end_ts = arrow.get(end_date).replace( - hour=23, minute=59, second=59).timestamp() - elif tz == 'local': - end_ts = arrow.get(end_date, tzinfo='local').replace( - hour=23, minute=59, second=59).timestamp() - + + (start_ts, end_ts) = get_ts_range(start_date, end_date, tz) ts = esta.TimeSeries.get_aggregate_time_series() entries = ts.find_entries( key_list=["analysis/recreated_location"], From f4c851dd602caf6675b39db585063dc149292e15 Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Fri, 26 Jan 2024 23:49:40 -0500 Subject: [PATCH 48/63] comment explaining that UUIDs are not filtered As described in the code comment, this code is unused and we are not currently filtering UUIDs by datetime, so updated the code to make this clearer --- utils/db_utils.py | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/utils/db_utils.py b/utils/db_utils.py index 960b37d..a7148f2 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -39,24 +39,27 @@ def get_ts_range(start_date: str, end_date: str, tz: str): return (start_ts, end_ts) def query_uuids(start_date: str, end_date: str, tz: str): - logging.debug("Querying the UUID DB for %s -> %s" % (start_date,end_date)) - query = {'update_ts': {'$exists': True}} - if start_date is not None: - # have arrow create a datetime using start_date and time 00:00:00 in UTC - start_time = arrow.get(start_date).datetime - query['update_ts']['$gte'] = start_time - - if end_date is not None: - # have arrow create a datetime using end_date and time 23:59:59 in UTC - end_time = arrow.get(end_date).replace(hour=23, minute=59, second=59).datetime - query['update_ts']['$lt'] = end_time - - projection = { - '_id': 0, - 'user_id': '$uuid', - 'user_token': '$user_email', - 'update_ts': 1 - } + # As of now, time filtering does not apply to UUIDs; we just query all of them. + # Vestigial code commented out and left below for future reference + + # logging.debug("Querying the UUID DB for %s -> %s" % (start_date,end_date)) + # query = {'update_ts': {'$exists': True}} + # if start_date is not None: + # # have arrow create a datetime using start_date and time 00:00:00 in UTC + # start_time = arrow.get(start_date).datetime + # query['update_ts']['$gte'] = start_time + # if end_date is not None: + # # have arrow create a datetime using end_date and time 23:59:59 in UTC + # end_time = arrow.get(end_date).replace(hour=23, minute=59, second=59).datetime + # query['update_ts']['$lt'] = end_time + # projection = { + # '_id': 0, + # 'user_id': '$uuid', + # 'user_token': '$user_email', + # 'update_ts': 1 + # } + + logging.debug("Querying the UUID DB for (no date range)") # This should actually use the profile DB instead of (or in addition to) # the UUID DB so that we can see the app version, os, manufacturer... From a802c908093e68899b17fb894f9c8b859efc92cb Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Sat, 27 Jan 2024 00:03:35 -0500 Subject: [PATCH 49/63] fix weird formatting This weird formatting was from vscode autoformatting (which I have now disabled for python projects) I know that Python is a language where whitespace / linebreaks can alter the function of the code so I want to be extra careful here --- utils/db_utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/utils/db_utils.py b/utils/db_utils.py index a7148f2..88899ee 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -31,11 +31,9 @@ def get_ts_range(start_date: str, end_date: str, tz: str): start_ts = arrow.get(start_date, tzinfo='local').timestamp() if end_date is not None: if tz == 'utc': - end_ts = arrow.get(end_date).replace( - hour=23, minute=59, second=59).timestamp() + end_ts = arrow.get(end_date).replace(hour=23, minute=59, second=59).timestamp() elif tz == 'local': - end_ts = arrow.get(end_date, tzinfo='local').replace( - hour=23, minute=59, second=59).timestamp() + end_ts = arrow.get(end_date, tzinfo='local').replace(hour=23, minute=59, second=59).timestamp() return (start_ts, end_ts) def query_uuids(start_date: str, end_date: str, tz: str): From f7f6057c83b4f2f370f1877dd559f4b61d68ccf0 Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Sun, 28 Jan 2024 00:24:47 -0500 Subject: [PATCH 50/63] add filter to exclude 'test' users 'tes' users are devs or program admininstrators whose data we usually want to keep separate from that of the participants themselves, identified by subgroup 'test' - in other words, if the opcode has "test" in it we should be able to filter out their data. A checkbox is added below the datepicker to "Exclude 'test' users" (checked by default) Filtering 'test' users from the UUIDs df is a simple task - drop any rows where 'user_token' contains 'test'. But the other dfs (for confirmed trips, survey responses, trajectories) do not have the 'user_token' column - only 'user_id'. So every time we want to filter these dfs, we'd have to cross-reference with the UUIDs table and lookup which 'user_id's correspond to test 'user_token's. (I think of this like a 'join' in SQL) I think a more performant solution is to keep a separate 'excluded uuids' store, which is just a plain list of excluded UUIDs that we determined while loading the 'uuids' store. The other stores can depend on this, so they will not need to depend on the entire 'uuids' store and do those cross-referencing lookups each time. --- app_sidebar_collapsible.py | 56 ++++++++++++++++++++++++++++++-------- pages/data.py | 13 +++++---- 2 files changed, 53 insertions(+), 16 deletions(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index dd1c67b..af46f31 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -160,7 +160,13 @@ 'display': 'flex', 'justify-content': 'right', 'align-items': 'center'}, - + ), + dcc.Checklist( + id='global-filters', + options=[ + {'label': 'Exclude "test" users', 'value': 'exclude-test-users'}, + ], + value=['exclude-test-users'], ), ], style={'margin': '10px 10px 0 0', @@ -192,12 +198,17 @@ Input('date-picker', 'start_date'), Input('date-picker', 'end_date'), Input('date-picker-timezone', 'value'), + Input('store-excluded-uuids', 'data'), ) -def update_store_demographics(start_date, end_date, timezone): - df = query_demographics() +def update_store_demographics(start_date, end_date, timezone, excluded_uuids): + dataframes = query_demographics() records = {} - for key, dataframe in df.items(): - records[key] = dataframe.to_dict("records") + for key, df in dataframes.items(): + if df.empty: + records[key] = [] + else: + non_excluded_df = df[~df['user_id'].isin(excluded_uuids["data"])] # filter excluded UUIDs + records[key] = non_excluded_df.to_dict("records") store = { "data": records, "length": len(records), @@ -209,6 +220,7 @@ def update_store_demographics(start_date, end_date, timezone): dcc.Location(id='url', refresh=False), dcc.Store(id='store-trips', data={}), dcc.Store(id='store-uuids', data={}), + dcc.Store(id='store-excluded-uuids', data={}), # if 'test' users are excluded, a list of their uuids dcc.Store(id='store-demographics', data= {}), dcc.Store(id ='store-trajectories', data = {}), html.Div(id='page-content', children=home_page), @@ -219,21 +231,39 @@ def update_store_demographics(start_date, end_date, timezone): # Load data stores @app.callback( Output("store-uuids", "data"), + Output("store-excluded-uuids", "data"), Input('date-picker', 'start_date'), # these are ISO strings Input('date-picker', 'end_date'), # these are ISO strings Input('date-picker-timezone', 'value'), + Input('global-filters', 'value'), ) -def update_store_uuids(start_date, end_date, timezone): +def update_store_uuids(start_date, end_date, timezone, filters): # trim the time part, leaving only date as YYYY-MM-DD start_date = start_date[:10] if start_date else None end_date = end_date[:10] if end_date else None dff = query_uuids(start_date, end_date, timezone) - records = dff.to_dict("records") - store = { + if dff.empty: return {"data": [], "length": 0}, {"data": [], "length": 0} + + # if 'exclude-testusers' filter is active, + # exclude any rows with user_token containing 'test', and + # output a list of those excluded UUIDs so other callbacks can exclude them too + if 'exclude-test-users' in filters: + excluded_uuids_list = dff[dff['user_token'].str.contains('test')]['user_id'].tolist() + non_excluded_dff = dff[~dff['user_id'].isin(excluded_uuids_list)] + records = non_excluded_dff.to_dict("records") + else: + excluded_uuids_list = [] + records = dff.to_dict("records") + + store_uuids = { "data": records, "length": len(records), } - return store + store_excluded_uuids = { + "data": excluded_uuids_list, + "length": len(excluded_uuids_list), + } + return store_uuids, store_excluded_uuids @app.callback( @@ -241,13 +271,17 @@ def update_store_uuids(start_date, end_date, timezone): Input('date-picker', 'start_date'), Input('date-picker', 'end_date'), Input('date-picker-timezone', 'value'), + Input('store-excluded-uuids', 'data'), ) -def update_store_trips(start_date, end_date, timezone): +def update_store_trips(start_date, end_date, timezone, excluded_uuids): # trim the time part, leaving only date as YYYY-MM-DD start_date = start_date[:10] if start_date else None end_date = end_date[:10] if end_date else None df = query_confirmed_trips(start_date, end_date, timezone) - records = df.to_dict("records") + if df.empty: return {"data": [], "length": 0} + + non_excluded_df = df[~df['user_id'].isin(excluded_uuids["data"])] # filter excluded UUIDs + records = non_excluded_df.to_dict("records") # logging.debug("returning records %s" % records[0:2]) store = { "data": records, diff --git a/pages/data.py b/pages/data.py index ab3a543..98a4c71 100644 --- a/pages/data.py +++ b/pages/data.py @@ -37,10 +37,12 @@ def clean_location_data(df): df['data.end_loc.coordinates'] = df['data.end_loc.coordinates'].apply(lambda x: f'({x[0]}, {x[1]})') return df -def update_store_trajectories(start_date: str, end_date: str, tz: str): +def update_store_trajectories(start_date: str, end_date: str, tz: str, excluded_uuids: list): global store_trajectories df = query_trajectories(start_date, end_date, tz) - records = df.to_dict("records") + if df.empty: return {"data": [], "length": 0} + non_excluded_df = df[~df['user_id'].isin(excluded_uuids["data"])] # filter excluded UUIDs + records = non_excluded_df.to_dict("records") store = { "data": records, "length": len(records), @@ -53,6 +55,7 @@ def update_store_trajectories(start_date: str, end_date: str, tz: str): Output('tabs-content', 'children'), Input('tabs-datatable', 'value'), Input('store-uuids', 'data'), + Input('store-excluded-uuids', 'data'), Input('store-trips', 'data'), Input('store-demographics', 'data'), Input('store-trajectories', 'data'), @@ -60,7 +63,7 @@ def update_store_trajectories(start_date: str, end_date: str, tz: str): Input('date-picker', 'end_date'), Input('date-picker-timezone', 'value'), ) -def render_content(tab, store_uuids, store_trips, store_demographics, store_trajectories, start_date, end_date, timezone): +def render_content(tab, store_uuids, store_excluded_uuids, store_trips, store_demographics, store_trajectories, start_date, end_date, timezone): data, columns, has_perm = None, [], False if tab == 'tab-uuids-datatable': data = store_uuids["data"] @@ -83,7 +86,7 @@ def render_content(tab, store_uuids, store_trips, store_demographics, store_traj data = list(data.values())[0] columns = list(data[0].keys()) # for multiple survey, create subtabs for unique surveys - else: + elif len(data) > 1: #returns subtab only if has_perm is True if not has_perm: return None @@ -98,7 +101,7 @@ def render_content(tab, store_uuids, store_trips, store_demographics, store_traj #Here we query for trajectory data once "Trajectories" tab is selected start_date, end_date = start_date[:10], end_date[:10] # dates as YYYY-MM-DD if store_trajectories == {}: - store_trajectories = update_store_trajectories(start_date, end_date, timezone) + store_trajectories = update_store_trajectories(start_date, end_date, timezone, store_excluded_uuids) data = store_trajectories["data"] if data: columns = list(data[0].keys()) From 4b1e29a07bc261756b0f3f77238c1c77ffd182fd Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Sun, 28 Jan 2024 00:45:39 -0500 Subject: [PATCH 51/63] refactor: df_to_filtered_records fn instead of repeating this filtering logic multiple times, let's make a reusable function that filters the dfs while converting them to 'records' dicts --- app_sidebar_collapsible.py | 23 ++++++----------------- pages/data.py | 8 +++----- utils/db_utils.py | 10 ++++++++++ 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index af46f31..b430ba3 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -23,7 +23,7 @@ if os.getenv('DASH_DEBUG_MODE', 'True').lower() == 'true': logging.basicConfig(level=logging.DEBUG) -from utils.db_utils import query_uuids, query_confirmed_trips, query_demographics +from utils.db_utils import df_to_filtered_records, query_uuids, query_confirmed_trips, query_demographics from utils.permissions import has_permission import flask_talisman as flt @@ -204,11 +204,7 @@ def update_store_demographics(start_date, end_date, timezone, excluded_uuids): dataframes = query_demographics() records = {} for key, df in dataframes.items(): - if df.empty: - records[key] = [] - else: - non_excluded_df = df[~df['user_id'].isin(excluded_uuids["data"])] # filter excluded UUIDs - records[key] = non_excluded_df.to_dict("records") + records[key] = df_to_filtered_records(df, 'user_id', excluded_uuids["data"]) store = { "data": records, "length": len(records), @@ -243,18 +239,14 @@ def update_store_uuids(start_date, end_date, timezone, filters): end_date = end_date[:10] if end_date else None dff = query_uuids(start_date, end_date, timezone) if dff.empty: return {"data": [], "length": 0}, {"data": [], "length": 0} - # if 'exclude-testusers' filter is active, # exclude any rows with user_token containing 'test', and # output a list of those excluded UUIDs so other callbacks can exclude them too if 'exclude-test-users' in filters: - excluded_uuids_list = dff[dff['user_token'].str.contains('test')]['user_id'].tolist() - non_excluded_dff = dff[~dff['user_id'].isin(excluded_uuids_list)] - records = non_excluded_dff.to_dict("records") + excluded_uuids_list = dff[dff['user_token'].str.contains('test')]['user_id'].tolist() else: - excluded_uuids_list = [] - records = dff.to_dict("records") - + excluded_uuids_list = [] + records = df_to_filtered_records(dff, 'user_id', excluded_uuids_list) store_uuids = { "data": records, "length": len(records), @@ -278,10 +270,7 @@ def update_store_trips(start_date, end_date, timezone, excluded_uuids): start_date = start_date[:10] if start_date else None end_date = end_date[:10] if end_date else None df = query_confirmed_trips(start_date, end_date, timezone) - if df.empty: return {"data": [], "length": 0} - - non_excluded_df = df[~df['user_id'].isin(excluded_uuids["data"])] # filter excluded UUIDs - records = non_excluded_df.to_dict("records") + records = df_to_filtered_records(df, 'user_id', excluded_uuids["data"]) # logging.debug("returning records %s" % records[0:2]) store = { "data": records, diff --git a/pages/data.py b/pages/data.py index 98a4c71..c09c74f 100644 --- a/pages/data.py +++ b/pages/data.py @@ -11,7 +11,7 @@ from utils import permissions as perm_utils from utils import db_utils -from utils.db_utils import query_trajectories +from utils.db_utils import df_to_filtered_records, query_trajectories register_page(__name__, path="/data") intro = """## Data""" @@ -37,12 +37,10 @@ def clean_location_data(df): df['data.end_loc.coordinates'] = df['data.end_loc.coordinates'].apply(lambda x: f'({x[0]}, {x[1]})') return df -def update_store_trajectories(start_date: str, end_date: str, tz: str, excluded_uuids: list): +def update_store_trajectories(start_date: str, end_date: str, tz: str, excluded_uuids): global store_trajectories df = query_trajectories(start_date, end_date, tz) - if df.empty: return {"data": [], "length": 0} - non_excluded_df = df[~df['user_id'].isin(excluded_uuids["data"])] # filter excluded UUIDs - records = non_excluded_df.to_dict("records") + records = df_to_filtered_records(df, 'user_id', excluded_uuids["data"]) store = { "data": records, "length": len(records), diff --git a/utils/db_utils.py b/utils/db_utils.py index 88899ee..b24bb18 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -36,6 +36,16 @@ def get_ts_range(start_date: str, end_date: str, tz: str): end_ts = arrow.get(end_date, tzinfo='local').replace(hour=23, minute=59, second=59).timestamp() return (start_ts, end_ts) +def df_to_filtered_records(df, col_to_filter=None, vals_to_exclude: list[str] = []): + """ + Returns a dictionary of df records, given a dataframe, a column to filter on, + and a list of values that rows in that column will be excluded if they match + """ + if df.empty: return [] + if col_to_filter is not None: + df = df[~df[col_to_filter].isin(vals_to_exclude)] + return df.to_dict("records") + def query_uuids(start_date: str, end_date: str, tz: str): # As of now, time filtering does not apply to UUIDs; we just query all of them. # Vestigial code commented out and left below for future reference From 5d70e0552feaee8e8e5af642bb89d9590752c916 Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Wed, 31 Jan 2024 14:04:11 -0500 Subject: [PATCH 52/63] skip filtering if no UUIDs are excluded This does not change the behavior but we can skip filtering if the list of values to 'exclude' is [] (or otherwise falsy). May be a small performance improvement. --- utils/db_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/db_utils.py b/utils/db_utils.py index b24bb18..70ff1c7 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -42,7 +42,7 @@ def df_to_filtered_records(df, col_to_filter=None, vals_to_exclude: list[str] = and a list of values that rows in that column will be excluded if they match """ if df.empty: return [] - if col_to_filter is not None: + if col_to_filter and vals_to_exclude: df = df[~df[col_to_filter].isin(vals_to_exclude)] return df.to_dict("records") From 87c5555afab8f53fabc58250663c4155bfa1efb8 Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Wed, 31 Jan 2024 23:31:52 -0500 Subject: [PATCH 53/63] move update_store_demographics below to the other 'store' fns For better organization --- app_sidebar_collapsible.py | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index dd1c67b..3805489 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -187,23 +187,6 @@ content, ] -@app.callback( - Output("store-demographics", "data"), - Input('date-picker', 'start_date'), - Input('date-picker', 'end_date'), - Input('date-picker-timezone', 'value'), -) -def update_store_demographics(start_date, end_date, timezone): - df = query_demographics() - records = {} - for key, dataframe in df.items(): - records[key] = dataframe.to_dict("records") - store = { - "data": records, - "length": len(records), - } - return store - app.layout = html.Div( [ dcc.Location(id='url', refresh=False), @@ -236,6 +219,24 @@ def update_store_uuids(start_date, end_date, timezone): return store +@app.callback( + Output("store-demographics", "data"), + Input('date-picker', 'start_date'), + Input('date-picker', 'end_date'), + Input('date-picker-timezone', 'value'), +) +def update_store_demographics(start_date, end_date, timezone): + df = query_demographics() + records = {} + for key, dataframe in df.items(): + records[key] = dataframe.to_dict("records") + store = { + "data": records, + "length": len(records), + } + return store + + @app.callback( Output("store-trips", "data"), Input('date-picker', 'start_date'), From d9c7df2915431b9fd0258e4502e0bd31a2083a3e Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Fri, 2 Feb 2024 02:10:04 -0500 Subject: [PATCH 54/63] make datepicker's "today" recompute on page load fixes the issue described in https://github.com/e-mission/op-admin-dashboard/pull/96#issuecomment-1919816848 To recompute a new date on refresh, "layout" needs to be a function. This required reorganizing the page contents. Some things can stay static but the things that need to be recomputed are reorganized into "make_" functions. Inside make_controls() is where we generate the dates, and this gets called on every refresh. The resulting DOM is the same. --- app_sidebar_collapsible.py | 67 ++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index 3805489..9738a9d 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -124,14 +124,14 @@ className="sidebar", ) -# according to docs, DatePickerRange will accept YYYY-MM-DD format -today_date = arrow.now().format('YYYY-MM-DD') -last_week_date = arrow.now().shift(days=-7).format('YYYY-MM-DD') -tomorrow_date = arrow.now().shift(days=1).format('YYYY-MM-DD') - -content = html.Div([ - # Global Date Picker - html.Div([ +# Global controls including date picker and timezone selector +def make_controls(): + # according to docs, DatePickerRange will accept YYYY-MM-DD format + today_date = arrow.now().format('YYYY-MM-DD') + last_week_date = arrow.now().shift(days=-7).format('YYYY-MM-DD') + tomorrow_date = arrow.now().shift(days=1).format('YYYY-MM-DD') + return html.Div([ + # Global Date Picker dcc.DatePickerRange( id='date-picker', display_format='D MMM Y', @@ -167,37 +167,34 @@ 'display': 'flex', 'flex-direction': 'column', 'align-items': 'end'} - ), - - # Pages Content - dcc.Loading( - type='default', - fullscreen=True, - children=html.Div(dash.page_container, style={ - "margin-left": "5rem", - "margin-right": "2rem", - "padding": "2rem 1rem", - }) - ), -]) + ) + +page_content = dcc.Loading( + type='default', + fullscreen=True, + children=html.Div(dash.page_container, style={ + "margin-left": "5rem", + "margin-right": "2rem", + "padding": "2rem 1rem", + }) +) -home_page = [ +def make_home_page(): return [ sidebar, - content, + html.Div([make_controls(), page_content]) ] -app.layout = html.Div( - [ - dcc.Location(id='url', refresh=False), - dcc.Store(id='store-trips', data={}), - dcc.Store(id='store-uuids', data={}), - dcc.Store(id='store-demographics', data= {}), - dcc.Store(id ='store-trajectories', data = {}), - html.Div(id='page-content', children=home_page), - ] -) +def make_layout(): return html.Div([ + dcc.Location(id='url', refresh=False), + dcc.Store(id='store-trips', data={}), + dcc.Store(id='store-uuids', data={}), + dcc.Store(id='store-demographics', data={}), + dcc.Store(id='store-trajectories', data={}), + html.Div(id='page-content', children=make_home_page()), +]) +app.layout = make_layout # Load data stores @app.callback( @@ -271,10 +268,10 @@ def display_page(search): return get_cognito_login_page('Unsuccessful authentication, try again.', 'red') if is_authenticated: - return home_page + return make_home_page() return get_cognito_login_page() - return home_page + return make_home_page() extra_csp_url = [ "https://raw.githubusercontent.com", From 0897cf90858a7d2687060046b26b14df7eb7564f Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Sun, 4 Feb 2024 22:58:52 -0500 Subject: [PATCH 55/63] refactor to iso_to_date_only(), create datetime_utils.py This code was repeated many times and should be extracted to a separate function. Did it in such a way that it accepts any number of inputs, thanks to Python's *args syntax Put this in a new file since none of the other 'utils' files had anything to do with datetime --- app_sidebar_collapsible.py | 13 +++++-------- pages/data.py | 3 ++- pages/home.py | 7 ++++--- utils/datetime_utils.py | 7 +++++++ 4 files changed, 18 insertions(+), 12 deletions(-) create mode 100644 utils/datetime_utils.py diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index 9738a9d..39699bc 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -23,6 +23,7 @@ if os.getenv('DASH_DEBUG_MODE', 'True').lower() == 'true': logging.basicConfig(level=logging.DEBUG) +from utils.datetime_utils import iso_to_date_only from utils.db_utils import query_uuids, query_confirmed_trips, query_demographics from utils.permissions import has_permission import flask_talisman as flt @@ -204,9 +205,7 @@ def make_layout(): return html.Div([ Input('date-picker-timezone', 'value'), ) def update_store_uuids(start_date, end_date, timezone): - # trim the time part, leaving only date as YYYY-MM-DD - start_date = start_date[:10] if start_date else None - end_date = end_date[:10] if end_date else None + (start_date, end_date) = iso_to_date_only(start_date, end_date) dff = query_uuids(start_date, end_date, timezone) records = dff.to_dict("records") store = { @@ -236,14 +235,12 @@ def update_store_demographics(start_date, end_date, timezone): @app.callback( Output("store-trips", "data"), - Input('date-picker', 'start_date'), - Input('date-picker', 'end_date'), + Input('date-picker', 'start_date'), # these are ISO strings + Input('date-picker', 'end_date'), # these are ISO strings Input('date-picker-timezone', 'value'), ) def update_store_trips(start_date, end_date, timezone): - # trim the time part, leaving only date as YYYY-MM-DD - start_date = start_date[:10] if start_date else None - end_date = end_date[:10] if end_date else None + (start_date, end_date) = iso_to_date_only(start_date, end_date) df = query_confirmed_trips(start_date, end_date, timezone) records = df.to_dict("records") # logging.debug("returning records %s" % records[0:2]) diff --git a/pages/data.py b/pages/data.py index ab3a543..995e784 100644 --- a/pages/data.py +++ b/pages/data.py @@ -12,6 +12,7 @@ from utils import permissions as perm_utils from utils import db_utils from utils.db_utils import query_trajectories +from utils.datetime_utils import iso_to_date_only register_page(__name__, path="/data") intro = """## Data""" @@ -96,7 +97,7 @@ def render_content(tab, store_uuids, store_trips, store_demographics, store_traj elif tab == 'tab-trajectories-datatable': # Currently store_trajectories data is loaded only when the respective tab is selected #Here we query for trajectory data once "Trajectories" tab is selected - start_date, end_date = start_date[:10], end_date[:10] # dates as YYYY-MM-DD + (start_date, end_date) = iso_to_date_only(start_date, end_date) if store_trajectories == {}: store_trajectories = update_store_trajectories(start_date, end_date, timezone) data = store_trajectories["data"] diff --git a/pages/home.py b/pages/home.py index 29e853d..c0b75f9 100644 --- a/pages/home.py +++ b/pages/home.py @@ -18,6 +18,7 @@ import emission.core.get_database as edb from utils.permissions import has_permission +from utils.datetime_utils import iso_to_date_only register_page(__name__, path="/") @@ -175,13 +176,13 @@ def generate_plot_sign_up_trend(store_uuids): @callback( Output('fig-trips-trend', 'figure'), Input('store-trips', 'data'), - Input('date-picker', 'start_date'), - Input('date-picker', 'end_date'), + Input('date-picker', 'start_date'), # these are ISO strings + Input('date-picker', 'end_date'), # these are ISO strings ) def generate_plot_trips_trend(store_trips, start_date, end_date): df = pd.DataFrame(store_trips.get("data")) trend_df = None - start_date, end_date = start_date[:10], end_date[:10] # dates as YYYY-MM-DD + (start_date, end_date) = iso_to_date_only(start_date, end_date) if not df.empty and has_permission('overview_trips_trend'): trend_df = compute_trips_trend(df, date_col = "trip_start_time_str") fig = generate_barplot(trend_df, x = 'date', y = 'count', title = f"Trips trend({start_date} to {end_date})") diff --git a/utils/datetime_utils.py b/utils/datetime_utils.py new file mode 100644 index 0000000..fa5cfd2 --- /dev/null +++ b/utils/datetime_utils.py @@ -0,0 +1,7 @@ +def iso_to_date_only(*iso_strs: str): + """ + For each ISO date string in the input, returns only the date part in the format 'YYYY-MM-DD' + e.g. '2021-01-01T00:00:00.000Z' -> '2021-01-01' + """ + return [iso_str[:10] if iso_str else None for iso_str in iso_strs] + \ No newline at end of file From 719199a1ca178e95a0ffb3439c79dd6b605cd000 Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Sun, 4 Feb 2024 23:08:05 -0500 Subject: [PATCH 56/63] move iso_range_to_ts_range to datetime_utils.py This function is used by db_utils but might be better categorized in datetime_utils. Previously called`get_ts_range`, its new name is `iso_range_to_ts_range`, since I thought that was more descriptive. -- Also, I noticed `arrow` was imported in db_utils twice, so i fixed that while I was here --- utils/datetime_utils.py | 27 ++++++++++++++++++++++++++- utils/db_utils.py | 27 +++------------------------ 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/utils/datetime_utils.py b/utils/datetime_utils.py index fa5cfd2..b626191 100644 --- a/utils/datetime_utils.py +++ b/utils/datetime_utils.py @@ -1,7 +1,32 @@ +import arrow + +MAX_EPOCH_TIME = 2 ** 31 - 1 + + +def iso_range_to_ts_range(start_date: str, end_date: str, tz: str): + """ + Returns a tuple of (start_ts, end_ts) as epoch timestamps, given start_date and end_date in + ISO format and the timezone mode in which the dates should be resolved to timestamps ('utc' or 'local') + """ + start_ts, end_ts = None, MAX_EPOCH_TIME + if start_date is not None: + if tz == 'utc': + start_ts = arrow.get(start_date).timestamp() + elif tz == 'local': + start_ts = arrow.get(start_date, tzinfo='local').timestamp() + if end_date is not None: + if tz == 'utc': + end_ts = arrow.get(end_date).replace( + hour=23, minute=59, second=59).timestamp() + elif tz == 'local': + end_ts = arrow.get(end_date, tzinfo='local').replace( + hour=23, minute=59, second=59).timestamp() + return (start_ts, end_ts) + + def iso_to_date_only(*iso_strs: str): """ For each ISO date string in the input, returns only the date part in the format 'YYYY-MM-DD' e.g. '2021-01-01T00:00:00.000Z' -> '2021-01-01' """ return [iso_str[:10] if iso_str else None for iso_str in iso_strs] - \ No newline at end of file diff --git a/utils/db_utils.py b/utils/db_utils.py index 88899ee..6a71442 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -2,8 +2,6 @@ import arrow from uuid import UUID -import arrow - import pandas as pd import pymongo @@ -15,26 +13,7 @@ from utils import constants from utils import permissions as perm_utils - -MAX_EPOCH_TIME = 2 ** 31 - 1 - -def get_ts_range(start_date: str, end_date: str, tz: str): - """ - Returns a tuple of (start_ts, end_ts) as timestamps, given start_date and end_date in ISO format - and the timezone mode in which the dates should be resolved to timestamps ('utc' or 'local') - """ - start_ts, end_ts = None, MAX_EPOCH_TIME - if start_date is not None: - if tz == 'utc': - start_ts = arrow.get(start_date).timestamp() - elif tz == 'local': - start_ts = arrow.get(start_date, tzinfo='local').timestamp() - if end_date is not None: - if tz == 'utc': - end_ts = arrow.get(end_date).replace(hour=23, minute=59, second=59).timestamp() - elif tz == 'local': - end_ts = arrow.get(end_date, tzinfo='local').replace(hour=23, minute=59, second=59).timestamp() - return (start_ts, end_ts) +from utils.datetime_utils import iso_range_to_ts_range def query_uuids(start_date: str, end_date: str, tz: str): # As of now, time filtering does not apply to UUIDs; we just query all of them. @@ -74,7 +53,7 @@ def query_uuids(start_date: str, end_date: str, tz: str): return df def query_confirmed_trips(start_date: str, end_date: str, tz: str): - (start_ts, end_ts) = get_ts_range(start_date, end_date, tz) + (start_ts, end_ts) = iso_range_to_ts_range(start_date, end_date, tz) ts = esta.TimeSeries.get_aggregate_time_series() # Note to self, allow end_ts to also be null in the timequery # we can then remove the start_time, end_time logic @@ -157,7 +136,7 @@ def query_demographics(): def query_trajectories(start_date: str, end_date: str, tz: str): - (start_ts, end_ts) = get_ts_range(start_date, end_date, tz) + (start_ts, end_ts) = iso_range_to_ts_range(start_date, end_date, tz) ts = esta.TimeSeries.get_aggregate_time_series() entries = ts.find_entries( key_list=["analysis/recreated_location"], From 95ded215f73bde8332fa58d173613f34228629cc Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Mon, 5 Feb 2024 11:23:29 -0500 Subject: [PATCH 57/63] organize filtering options into a collapsible menu --- app_sidebar_collapsible.py | 116 +++++++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 43 deletions(-) diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index 29685f0..a3276ff 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -132,49 +132,65 @@ def make_controls(): last_week_date = arrow.now().shift(days=-7).format('YYYY-MM-DD') tomorrow_date = arrow.now().shift(days=1).format('YYYY-MM-DD') return html.Div([ - # Global Date Picker - dcc.DatePickerRange( - id='date-picker', - display_format='D MMM Y', - start_date=last_week_date, - end_date=today_date, - min_date_allowed='2010-1-1', - max_date_allowed=tomorrow_date, - initial_visible_month=today_date, - ), - html.Div([ - html.Span('Query trips using: ', style={'margin-right': '10px'}), - dcc.Dropdown( - id='date-picker-timezone', - options=[ - {'label': 'UTC Time', 'value': 'utc'}, - {'label': 'My Local Timezone', 'value': 'local'}, - # {'label': 'Local Timezone of Trips', 'value': 'trips'}, - ], - value='utc', - clearable=False, - searchable=False, - style={'width': '220px'}, - ), - ], - style={'margin': '10px 10px 0 0', - 'display': 'flex', - 'justify-content': 'right', - 'align-items': 'center'}, - ), - dcc.Checklist( - id='global-filters', - options=[ - {'label': 'Exclude "test" users', 'value': 'exclude-test-users'}, - ], - value=['exclude-test-users'], - ), - ], - style={'margin': '10px 10px 0 0', - 'display': 'flex', - 'flex-direction': 'column', - 'align-items': 'end'} - ) + html.Div([ + # Global Date Picker + dcc.DatePickerRange( + id='date-picker', + display_format='D MMM Y', + start_date=last_week_date, + end_date=today_date, + min_date_allowed='2010-1-1', + max_date_allowed=tomorrow_date, + initial_visible_month=today_date, + ), + dbc.Button( + html.I(className="fas fa-bars", id='collapse-icon'), + outline=True, + id="collapse-button", + n_clicks=0, + style={'color': '#444', 'border': '1px solid #dbdbdb', + 'border-radius': '3px', 'margin-left': '3px'} + ), + ], + style={'display': 'flex'}, + ), + dbc.Collapse([ + html.Div([ + html.Span('Query trips using: ', style={'margin-right': '10px'}), + dcc.Dropdown( + id='date-picker-timezone', + options=[ + {'label': 'UTC Time', 'value': 'utc'}, + {'label': 'My Local Timezone', 'value': 'local'}, + # {'label': 'Local Timezone of Trips', 'value': 'trips'}, + ], + value='utc', + clearable=False, + searchable=False, + style={'width': '180px'}, + )] + ), + + dcc.Checklist( + id='global-filters', + options=[ + {'label': 'Exclude "test" users', + 'value': 'exclude-test-users'}, + ], + value=['exclude-test-users'], + style={'margin-top': '10px'}, + ), + ], + id='collapse-filters', + is_open=False, + style={'padding': '5px 15px 10px', 'border': '1px solid #dbdbdb', 'border-top': '0'} + ), + ], + style={'margin': '10px 10px 0 auto', + 'width': 'fit-content', + 'display': 'flex', + 'flex-direction': 'column'} + ) page_content = dcc.Loading( type='default', @@ -204,6 +220,20 @@ def make_layout(): return html.Div([ ]) app.layout = make_layout +# make the 'filters' menu collapsible +@app.callback( + Output("collapse-filters", "is_open"), + Output("collapse-icon", "className"), + [Input("collapse-button", "n_clicks")], + [Input("collapse-filters", "is_open")], +) +def toggle_collapse_filters(n, is_open): + if not n: return (is_open, "fas fa-bars") + if is_open: + return (False, "fas fa-bars") + else: + return (True, "fas fa-chevron-up") + # Load data stores @app.callback( Output("store-uuids", "data"), From d00e42a85f0bb41eb886ca91dae9b356d5fc7e19 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Thu, 8 Feb 2024 16:36:37 -0800 Subject: [PATCH 58/63] Remove the scatterplot inside heatmap --- pages/map.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pages/map.py b/pages/map.py index de3ce58..fda82f1 100644 --- a/pages/map.py +++ b/pages/map.py @@ -84,18 +84,18 @@ def create_heatmap_fig(coordinates): ) ) - fig.add_trace( - go.Scattermapbox( - lat=coordinates['lat'], - lon=coordinates['lon'], - mode='markers', - marker=go.scattermapbox.Marker( - size=9, - color=coordinates['color'], - ), - name = '', - ) - ) + # fig.add_trace( + # go.Scattermapbox( + # lat=coordinates['lat'], + # lon=coordinates['lon'], + # mode='markers', + # marker=go.scattermapbox.Marker( + # size=9, + # color=coordinates['color'], + # ), + # name = '', + # ) + # ) fig.update_layout( mapbox_style='open-street-map', From 2db052e29f771eb03918be26ed43c51128b8e7ce Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Thu, 8 Feb 2024 16:39:49 -0800 Subject: [PATCH 59/63] Remove the scatterplot inside heatmap --- pages/map.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/pages/map.py b/pages/map.py index fda82f1..2f62cc7 100644 --- a/pages/map.py +++ b/pages/map.py @@ -83,20 +83,6 @@ def create_heatmap_fig(coordinates): ) ) - - # fig.add_trace( - # go.Scattermapbox( - # lat=coordinates['lat'], - # lon=coordinates['lon'], - # mode='markers', - # marker=go.scattermapbox.Marker( - # size=9, - # color=coordinates['color'], - # ), - # name = '', - # ) - # ) - fig.update_layout( mapbox_style='open-street-map', mapbox_center_lon=coordinates['lon'][0], From 9b2a785d53614b395ec5230d55f192de43f35f6d Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Sun, 11 Feb 2024 18:34:38 -0800 Subject: [PATCH 60/63] Support both humanized and raw values in trips table. (#97) * Support both humanized and raw values in trips table Changes: - Introduce a dropdown option in the trip table, enabling user to select raw value columns - The selected column is dynamically displayed in the trip table * Remove INITIAL_TRIP_COLS and adjust code accordingly. * Removed Toggle Columns button * update column name and table ids - column name now have their units labeled. * Add button to switch between raw columns and humanized columns. - Humanized columns will be displayed initially - Allow user to switch between humanized and raw columns * Add comments to modified and added lines of code * Update unique id for each datatable. * Add common id to all the table on data page. * Remove unnecessary lines of code. * Provide table id for trips table only * Add state to update button-label * Remove extraneous whitespace changes * More whitespace changes to fix the initial gap * Final final whitespace change Because one of the lines had an initial tab * Trying to fix whitespace by coping from original file * Ok so this should work --------- Co-authored-by: K. Shankari --- pages/data.py | 51 ++++++++++++++++++++++++++++++++++++++++------ utils/constants.py | 4 ++++ utils/db_utils.py | 6 ++++-- 3 files changed, 53 insertions(+), 8 deletions(-) diff --git a/pages/data.py b/pages/data.py index 995e784..a055c4d 100644 --- a/pages/data.py +++ b/pages/data.py @@ -3,12 +3,13 @@ Since the dcc.Location component is not in the layout when navigating to this page, it triggers the callback. The workaround is to check if the input value is None. """ -from dash import dcc, html, Input, Output, callback, register_page, dash_table +from dash import dcc, html, Input, Output, callback, register_page, dash_table, State # Etc import logging import pandas as pd from dash.exceptions import PreventUpdate +from utils import constants from utils import permissions as perm_utils from utils import db_utils from utils.db_utils import query_trajectories @@ -75,6 +76,25 @@ def render_content(tab, store_uuids, store_trips, store_demographics, store_traj col['label'] for col in perm_utils.get_allowed_named_trip_columns() ) has_perm = perm_utils.has_permission('data_trips') + df = pd.DataFrame(data) + if df.empty or not has_perm: + return None + + df = df.drop(columns=[col for col in df.columns if col not in columns]) + df = clean_location_data(df) + + trips_table = populate_datatable(df,'trips-table') + #Return an HTML Div containing a button (button-clicked) and the populated datatable + return html.Div([ + html.Button( + 'Display columns with raw units', + id='button-clicked', #identifier for the button + n_clicks=0, #initialize number of clicks to 0 + style={'marginLeft':'5px'} + ), + trips_table, #populated trips table component + ]) + elif tab == 'tab-demographics-datatable': data = store_demographics["data"] has_perm = perm_utils.has_permission('data_demographics') @@ -111,7 +131,6 @@ def render_content(tab, store_uuids, store_trips, store_demographics, store_traj return None df = df.drop(columns=[col for col in df.columns if col not in columns]) - df = clean_location_data(df) return populate_datatable(df) @@ -136,12 +155,31 @@ def update_sub_tab(tab, store_demographics): df = df.drop(columns=[col for col in df.columns if col not in columns]) return populate_datatable(df) - -def populate_datatable(df): + + +@callback( + Output('trips-table', 'hidden_columns'), # Output hidden columns in the trips-table + Output('button-clicked', 'children'), #updates button label + Input('button-clicked', 'n_clicks'), #number of clicks on the button + State('button-clicked', 'children') #State representing the current label of button +) +#Controls visibility of columns in trips table and updates the label of button based on the number of clicks. +def update_dropdowns_trips(n_clicks, button_label): + if n_clicks % 2 == 0: + hidden_col = ["data.duration_seconds", "data.distance_meters","data.distance"] + button_label = 'Display columns with raw units' + else: + hidden_col = ["data.duration", "data.distance_miles", "data.distance_km", "data.distance"] + button_label = 'Display columns with humanzied units' + #return the list of hidden columns and the updated button label + return hidden_col, button_label + + +def populate_datatable(df, table_id=''): if not isinstance(df, pd.DataFrame): raise PreventUpdate return dash_table.DataTable( - # id='my-table', + id= table_id, # columns=[{"name": i, "id": i} for i in df.columns], data=df.to_dict('records'), export_format="csv", @@ -157,5 +195,6 @@ def populate_datatable(df): # 'width': '100px', # 'maxWidth': '100px', }, - style_table={'overflowX': 'auto'} + style_table={'overflowX': 'auto'}, + css=[{"selector":".show-hide", "rule":"display:none"}] ) diff --git a/utils/constants.py b/utils/constants.py index 2b40c81..3d53363 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -17,7 +17,11 @@ "data.end_local_dt", "data.end_fmt_time", "data.duration", + "data.duration_seconds", "data.distance", + "data.distance_km", + "data.distance_miles", + "data.distance_meters", "data.start_loc.coordinates", "data.end_loc.coordinates", "user_id" diff --git a/utils/db_utils.py b/utils/db_utils.py index 6a71442..a23dec7 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -82,14 +82,16 @@ def query_confirmed_trips(start_date: str, end_date: str, tz: str): # https://github.com/e-mission/op-admin-dashboard/issues/29#issuecomment-1530105040 # https://github.com/e-mission/op-admin-dashboard/issues/29#issuecomment-1530439811 # so just replacing the distance and duration with the humanized values for now + df['data.distance_meters'] = df['data.distance'] use_imperial = perm_utils.config.get("display_config", {"use_imperial": False}).get("use_imperial", False) # convert to km to humanize - df['data.distance'] = df['data.distance'] / 1000 + df['data.distance_km'] = df['data.distance'] / 1000 # convert km further to miles because this is the US, Liberia or Myanmar # https://en.wikipedia.org/wiki/Mile + df['data.duration_seconds'] = df['data.duration'] if use_imperial: - df['data.distance'] = df['data.distance'] * 0.6213712 + df['data.distance_miles'] = df['data.distance_km'] * 0.6213712 df['data.duration'] = df['data.duration'].apply(lambda d: arrow.utcnow().shift(seconds=d).humanize(only_distance=True)) From 1374778148514796ab86a95485f40c79a03c7303 Mon Sep 17 00:00:00 2001 From: Jack Greenlee Date: Tue, 13 Feb 2024 02:03:40 -0500 Subject: [PATCH 61/63] add comment about falsy [] https://github.com/e-mission/op-admin-dashboard/pull/98#discussion_r1480803263 --- utils/db_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/db_utils.py b/utils/db_utils.py index ac83508..3b25d53 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -21,7 +21,7 @@ def df_to_filtered_records(df, col_to_filter=None, vals_to_exclude: list[str] = and a list of values that rows in that column will be excluded if they match """ if df.empty: return [] - if col_to_filter and vals_to_exclude: + if col_to_filter and vals_to_exclude: # will only filter if both are not None or [] df = df[~df[col_to_filter].isin(vals_to_exclude)] return df.to_dict("records") From 2ac9e3a4f8006f319e419f39bd9fa4b3ac2e3409 Mon Sep 17 00:00:00 2001 From: "K. Shankari" Date: Thu, 15 Feb 2024 22:14:45 -0800 Subject: [PATCH 62/63] Bump up e-mission-server image --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 3e37b1d..2c564f3 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM shankari/e-mission-server:master_2023-12-22--17-36 +FROM shankari/e-mission-server:master_2024-02-10--19-38 ENV DASH_DEBUG_MODE True ENV SERVER_PORT 8050 From ee4b358cf2bbccdf54efb2838f0cebfbc6682341 Mon Sep 17 00:00:00 2001 From: Asmita Acharya <79387860+achasmita@users.noreply.github.com> Date: Tue, 20 Feb 2024 09:21:03 -0800 Subject: [PATCH 63/63] Uncomment logic to send push notifications. --- pages/push_notification.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pages/push_notification.py b/pages/push_notification.py index dc03219..57c954f 100644 --- a/pages/push_notification.py +++ b/pages/push_notification.py @@ -190,12 +190,13 @@ def send_push_notification( send_n_clicks, log, query_spec, emails, uuids, log_o logs.append("dry run, skipping actual push") return "\n".join(logs), 0 else: + response = pnu.send_visible_notification_to_users( + uuid_list, + title, + message, + survey_spec, + ) + pnu.display_response(response) + logs.append("Push notification sent successfully") return "\n".join(logs), 0 - # response = pnu.send_visible_notification_to_users( - # uuid_list, - # title, - # message, - # survey_spec, - # ) - # pnu.display_response(response) return log, 0