From 85a2fad08ff1ab13e9b87d9b940ef0e5779f22e4 Mon Sep 17 00:00:00 2001 From: TeachMeTW Date: Mon, 16 Sep 2024 22:11:00 -0700 Subject: [PATCH 1/2] Reduced Time by Half --- pages/home.py | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/pages/home.py b/pages/home.py index c0b75f9..5a697fa 100644 --- a/pages/home.py +++ b/pages/home.py @@ -13,6 +13,9 @@ # Etc import pandas as pd import arrow +import time +import logging +from concurrent.futures import ThreadPoolExecutor # e-mission modules import emission.core.get_database as edb @@ -123,8 +126,12 @@ def generate_card(title_text, body_text, icon): Input('store-uuids', 'data'), ) def update_card_users(store_uuids): + start_time = time.time() number_of_users = store_uuids.get('length') if has_permission('overview_users') else 0 card = generate_card("# Users", f"{number_of_users} users", "fa fa-users") + end_time = time.time() # End timing + execution_time = end_time - start_time + logging.debug(f'Time taken to update card users: {execution_time:.4f} seconds') return card @@ -133,12 +140,28 @@ def update_card_users(store_uuids): Input('store-uuids', 'data'), ) def update_card_active_users(store_uuids): + start_time = time.time() + + # Convert the UUIDs into a DataFrame uuid_df = pd.DataFrame(store_uuids.get('data')) number_of_active_users = 0 + if not uuid_df.empty and has_permission('overview_active_users'): one_day = 24 * 60 * 60 - number_of_active_users = get_number_of_active_users(uuid_df['user_id'], one_day) + + # Parallelize the call to get the number of active users + with ThreadPoolExecutor() as executor: + future = executor.submit(get_number_of_active_users, uuid_df['user_id'], one_day) + number_of_active_users = future.result() + + # Generate the card card = generate_card("# Active users", f"{number_of_active_users} users", "fa fa-person-walking") + + # End timing + end_time = time.time() + execution_time = end_time - start_time + logging.debug(f'Time taken to update card active users: {execution_time:.4f} seconds') + return card @@ -147,8 +170,12 @@ def update_card_active_users(store_uuids): Input('store-trips', 'data'), ) def update_card_trips(store_trips): + start_time = time.time() number_of_trips = store_trips.get('length') if has_permission('overview_trips') else 0 card = generate_card("# Confirmed trips", f"{number_of_trips} trips", "fa fa-angles-right") + end_time = time.time() # End timing + execution_time = end_time - start_time + logging.debug(f'Time taken to update card trips: {execution_time:.4f} seconds') return card @@ -165,11 +192,15 @@ def generate_barplot(data, x, y, title): Input('store-uuids', 'data'), ) def generate_plot_sign_up_trend(store_uuids): + start_time = time.time() df = pd.DataFrame(store_uuids.get("data")) trend_df = None if not df.empty and has_permission('overview_signup_trends'): trend_df = compute_sign_up_trend(df) fig = generate_barplot(trend_df, x = 'date', y = 'count', title = "Sign-ups trend") + end_time = time.time() # End timing + execution_time = end_time - start_time + logging.debug(f'Time taken to generate plot sign up trend: {execution_time:.4f} seconds') return fig @@ -180,10 +211,14 @@ def generate_plot_sign_up_trend(store_uuids): Input('date-picker', 'end_date'), # these are ISO strings ) def generate_plot_trips_trend(store_trips, start_date, end_date): + start_time = time.time() df = pd.DataFrame(store_trips.get("data")) trend_df = None (start_date, end_date) = iso_to_date_only(start_date, end_date) if not df.empty and has_permission('overview_trips_trend'): trend_df = compute_trips_trend(df, date_col = "trip_start_time_str") fig = generate_barplot(trend_df, x = 'date', y = 'count', title = f"Trips trend({start_date} to {end_date})") + end_time = time.time() # End timing + execution_time = end_time - start_time + logging.debug(f'Time taken to generate plot trips trend: {execution_time:.4f} seconds') return fig From 9c143cbb5379aa90d8b25311be00147f57872ce9 Mon Sep 17 00:00:00 2001 From: TeachMeTW Date: Tue, 8 Oct 2024 20:14:08 -0700 Subject: [PATCH 2/2] Increased Speed --- pages/home.py | 155 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 102 insertions(+), 53 deletions(-) diff --git a/pages/home.py b/pages/home.py index c0b75f9..0230e1f 100644 --- a/pages/home.py +++ b/pages/home.py @@ -1,25 +1,42 @@ -""" -Note that the callback will trigger even if prevent_initial_call=True. This is because dcc.Location must -be in app.py. Since the dcc.Location component is not in the layout when navigating to this page, it triggers the callback. -The workaround is to check if the input value is None. - -""" from uuid import UUID from dash import dcc, html, Input, Output, callback, register_page import dash_bootstrap_components as dbc - import plotly.express as px - -# Etc import pandas as pd import arrow +import logging +import time +from functools import wraps # e-mission modules import emission.core.get_database as edb - from utils.permissions import has_permission from utils.datetime_utils import iso_to_date_only +# Configure logging +logging.basicConfig( + level=logging.DEBUG, # Set to DEBUG to capture all levels of log messages + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.StreamHandler() # Logs will be output to the console + ] +) +logger = logging.getLogger(__name__) + +def log_execution_time(func): + @wraps(func) + def wrapper(*args, **kwargs): + logger.debug(f"Starting '{func.__name__}'") + start_time = time.perf_counter() + try: + result = func(*args, **kwargs) + return result + finally: + end_time = time.perf_counter() + elapsed_time = end_time - start_time + logger.debug(f"Finished '{func.__name__}' in {elapsed_time:.4f} seconds") + return wrapper + register_page(__name__, path="/") intro = "## Home" @@ -50,7 +67,7 @@ ] ) - +@log_execution_time def compute_sign_up_trend(uuid_df): uuid_df['update_ts'] = pd.to_datetime(uuid_df['update_ts'], utc=True) res_df = ( @@ -62,7 +79,7 @@ def compute_sign_up_trend(uuid_df): ) return res_df - +@log_execution_time def compute_trips_trend(trips_df, date_col): trips_df[date_col] = pd.to_datetime(trips_df[date_col], utc=True) trips_df[date_col] = pd.DatetimeIndex(trips_df[date_col]).date @@ -75,18 +92,41 @@ def compute_trips_trend(trips_df, date_col): ) return res_df - +@log_execution_time def find_last_get(uuid_list): - uuid_list = [UUID(npu) for npu in uuid_list] - last_item = list(edb.get_timeseries_db().aggregate([ - {'$match': {'user_id': {'$in': uuid_list}}}, - {'$match': {'metadata.key': 'stats/server_api_time'}}, - {'$match': {'data.name': 'POST_/usercache/get'}}, - {'$group': {'_id': '$user_id', 'write_ts': {'$max': '$metadata.write_ts'}}}, - ])) - return last_item + + # Do we really need this? + # Looks like this takes the most time + # uuid_list = [UUID(npu) for npu in uuid_list] + + if isinstance(uuid_list, pd.Series): + uuid_list = uuid_list.tolist() + + # Combined $match stages + pipeline = [ + { + '$match': { + 'user_id': {'$in': uuid_list}, + 'metadata.key': 'stats/server_api_time', + 'data.name': 'POST_/usercache/get' + } + }, + { + '$group': { + '_id': '$user_id', + 'write_ts': {'$max': '$metadata.write_ts'} + } + } + ] + + + # maybe try profiling + last_items = list(edb.get_timeseries_db().aggregate(pipeline)) + + return last_items +@log_execution_time def get_number_of_active_users(uuid_list, threshold): last_get_entries = find_last_get(uuid_list) number_of_active_users = 0 @@ -98,41 +138,51 @@ def get_number_of_active_users(uuid_list, threshold): number_of_active_users += 1 return number_of_active_users - +@log_execution_time def generate_card(title_text, body_text, icon): card = dbc.CardGroup([ - dbc.Card( - dbc.CardBody( - [ - html.H5(title_text, className="card-title"), - html.P(body_text, className="card-text",), - ] - ) - ), - dbc.Card( - html.Div(className=icon, style=card_icon), - className="bg-primary", - style={"maxWidth": 75}, - ), - ]) + dbc.Card( + dbc.CardBody( + [ + html.H5(title_text, className="card-title"), + html.P(body_text, className="card-text"), + ] + ) + ), + dbc.Card( + html.Div(className=icon, style=card_icon), + className="bg-primary", + style={"maxWidth": 75}, + ), + ]) return card +@log_execution_time +def generate_barplot(data, x, y, title): + fig = px.bar() + if data is not None: + fig = px.bar(data, x=x, y=y) + fig.update_layout(title=title) + return fig @callback( Output('card-users', 'children'), Input('store-uuids', 'data'), ) +@log_execution_time def update_card_users(store_uuids): + logger.debug("Callback 'update_card_users' triggered") number_of_users = store_uuids.get('length') if has_permission('overview_users') else 0 card = generate_card("# Users", f"{number_of_users} users", "fa fa-users") return card - @callback( Output('card-active-users', 'children'), Input('store-uuids', 'data'), ) +@log_execution_time def update_card_active_users(store_uuids): + logger.debug("Callback 'update_card_active_users' triggered") uuid_df = pd.DataFrame(store_uuids.get('data')) number_of_active_users = 0 if not uuid_df.empty and has_permission('overview_active_users'): @@ -141,49 +191,48 @@ def update_card_active_users(store_uuids): card = generate_card("# Active users", f"{number_of_active_users} users", "fa fa-person-walking") return card - @callback( Output('card-trips', 'children'), Input('store-trips', 'data'), ) +@log_execution_time def update_card_trips(store_trips): + logger.debug("Callback 'update_card_trips' triggered") number_of_trips = store_trips.get('length') if has_permission('overview_trips') else 0 card = generate_card("# Confirmed trips", f"{number_of_trips} trips", "fa fa-angles-right") return card - -def generate_barplot(data, x, y, title): - fig = px.bar() - if data is not None: - fig = px.bar(data, x=x, y=y) - fig.update_layout(title=title) - return fig - - @callback( Output('fig-sign-up-trend', 'figure'), Input('store-uuids', 'data'), ) +@log_execution_time def generate_plot_sign_up_trend(store_uuids): + logger.debug("Callback 'generate_plot_sign_up_trend' triggered") df = pd.DataFrame(store_uuids.get("data")) trend_df = None if not df.empty and has_permission('overview_signup_trends'): trend_df = compute_sign_up_trend(df) - fig = generate_barplot(trend_df, x = 'date', y = 'count', title = "Sign-ups trend") + fig = generate_barplot(trend_df, x='date', y='count', title="Sign-ups trend") return fig - @callback( Output('fig-trips-trend', 'figure'), Input('store-trips', 'data'), - Input('date-picker', 'start_date'), # these are ISO strings - Input('date-picker', 'end_date'), # these are ISO strings + Input('date-picker', 'start_date'), # these are ISO strings + Input('date-picker', 'end_date'), # these are ISO strings ) +@log_execution_time def generate_plot_trips_trend(store_trips, start_date, end_date): + if store_trips is None: + logger.debug("Callback 'generate_plot_trips_trend' triggered with store_trips=None") + return px.bar() # Return an empty figure or a placeholder + + logger.debug("Callback 'generate_plot_trips_trend' triggered with valid inputs") df = pd.DataFrame(store_trips.get("data")) trend_df = None (start_date, end_date) = iso_to_date_only(start_date, end_date) if not df.empty and has_permission('overview_trips_trend'): - trend_df = compute_trips_trend(df, date_col = "trip_start_time_str") - fig = generate_barplot(trend_df, x = 'date', y = 'count', title = f"Trips trend({start_date} to {end_date})") + trend_df = compute_trips_trend(df, date_col="trip_start_time_str") + fig = generate_barplot(trend_df, x='date', y='count', title=f"Trips trend({start_date} to {end_date})") return fig