Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance Improvements in Home Page Processing #126

Closed
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 102 additions & 53 deletions pages/home.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,42 @@
"""
Note that the callback will trigger even if prevent_initial_call=True. This is because dcc.Location must
be in app.py. Since the dcc.Location component is not in the layout when navigating to this page, it triggers the callback.
The workaround is to check if the input value is None.

"""
from uuid import UUID
from dash import dcc, html, Input, Output, callback, register_page
import dash_bootstrap_components as dbc

import plotly.express as px

# Etc
import pandas as pd
import arrow
import logging
import time
from functools import wraps

# e-mission modules
import emission.core.get_database as edb

from utils.permissions import has_permission
from utils.datetime_utils import iso_to_date_only

# Configure logging
logging.basicConfig(
level=logging.DEBUG, # Set to DEBUG to capture all levels of log messages
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler() # Logs will be output to the console
]
)
logger = logging.getLogger(__name__)

def log_execution_time(func):
@wraps(func)
def wrapper(*args, **kwargs):
logger.debug(f"Starting '{func.__name__}'")
start_time = time.perf_counter()
try:
result = func(*args, **kwargs)
return result
finally:
end_time = time.perf_counter()
elapsed_time = end_time - start_time
logger.debug(f"Finished '{func.__name__}' in {elapsed_time:.4f} seconds")
return wrapper

register_page(__name__, path="/")

intro = "## Home"
Expand Down Expand Up @@ -50,7 +67,7 @@
]
)


@log_execution_time
def compute_sign_up_trend(uuid_df):
uuid_df['update_ts'] = pd.to_datetime(uuid_df['update_ts'], utc=True)
res_df = (
Expand All @@ -62,7 +79,7 @@ def compute_sign_up_trend(uuid_df):
)
return res_df


@log_execution_time
def compute_trips_trend(trips_df, date_col):
trips_df[date_col] = pd.to_datetime(trips_df[date_col], utc=True)
trips_df[date_col] = pd.DatetimeIndex(trips_df[date_col]).date
Expand All @@ -75,18 +92,41 @@ def compute_trips_trend(trips_df, date_col):
)
return res_df


@log_execution_time
def find_last_get(uuid_list):
uuid_list = [UUID(npu) for npu in uuid_list]
last_item = list(edb.get_timeseries_db().aggregate([
{'$match': {'user_id': {'$in': uuid_list}}},
{'$match': {'metadata.key': 'stats/server_api_time'}},
{'$match': {'data.name': 'POST_/usercache/get'}},
{'$group': {'_id': '$user_id', 'write_ts': {'$max': '$metadata.write_ts'}}},
]))
return last_item

# Do we really need this?
# Looks like this takes the most time
# uuid_list = [UUID(npu) for npu in uuid_list]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's odd that this would be the bottleneck.
Are the entries of uuid_list already instances of UUID? Or are they strings?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JGreenlee uuid_list is already a string; I don't see why we need to convert it to uuids when we are just querying it.


if isinstance(uuid_list, pd.Series):
uuid_list = uuid_list.tolist()

# Combined $match stages
pipeline = [
{
'$match': {
'user_id': {'$in': uuid_list},
'metadata.key': 'stats/server_api_time',
'data.name': 'POST_/usercache/get'
}
},
{
'$group': {
'_id': '$user_id',
'write_ts': {'$max': '$metadata.write_ts'}
}
}
]


# maybe try profiling
last_items = list(edb.get_timeseries_db().aggregate(pipeline))

return last_items


@log_execution_time
def get_number_of_active_users(uuid_list, threshold):
last_get_entries = find_last_get(uuid_list)
number_of_active_users = 0
Expand All @@ -98,41 +138,51 @@ def get_number_of_active_users(uuid_list, threshold):
number_of_active_users += 1
return number_of_active_users


@log_execution_time
def generate_card(title_text, body_text, icon):
card = dbc.CardGroup([
dbc.Card(
dbc.CardBody(
[
html.H5(title_text, className="card-title"),
html.P(body_text, className="card-text",),
]
)
),
dbc.Card(
html.Div(className=icon, style=card_icon),
className="bg-primary",
style={"maxWidth": 75},
),
])
dbc.Card(
dbc.CardBody(
[
html.H5(title_text, className="card-title"),
html.P(body_text, className="card-text"),
]
)
),
dbc.Card(
html.Div(className=icon, style=card_icon),
className="bg-primary",
style={"maxWidth": 75},
),
])
return card

@log_execution_time
def generate_barplot(data, x, y, title):
fig = px.bar()
if data is not None:
fig = px.bar(data, x=x, y=y)
fig.update_layout(title=title)
return fig

@callback(
Output('card-users', 'children'),
Input('store-uuids', 'data'),
)
@log_execution_time
def update_card_users(store_uuids):
logger.debug("Callback 'update_card_users' triggered")
number_of_users = store_uuids.get('length') if has_permission('overview_users') else 0
card = generate_card("# Users", f"{number_of_users} users", "fa fa-users")
return card


@callback(
Output('card-active-users', 'children'),
Input('store-uuids', 'data'),
)
@log_execution_time
def update_card_active_users(store_uuids):
logger.debug("Callback 'update_card_active_users' triggered")
uuid_df = pd.DataFrame(store_uuids.get('data'))
number_of_active_users = 0
if not uuid_df.empty and has_permission('overview_active_users'):
Expand All @@ -141,49 +191,48 @@ def update_card_active_users(store_uuids):
card = generate_card("# Active users", f"{number_of_active_users} users", "fa fa-person-walking")
return card


@callback(
Output('card-trips', 'children'),
Input('store-trips', 'data'),
)
@log_execution_time
def update_card_trips(store_trips):
logger.debug("Callback 'update_card_trips' triggered")
number_of_trips = store_trips.get('length') if has_permission('overview_trips') else 0
card = generate_card("# Confirmed trips", f"{number_of_trips} trips", "fa fa-angles-right")
return card


def generate_barplot(data, x, y, title):
fig = px.bar()
if data is not None:
fig = px.bar(data, x=x, y=y)
fig.update_layout(title=title)
return fig


@callback(
Output('fig-sign-up-trend', 'figure'),
Input('store-uuids', 'data'),
)
@log_execution_time
def generate_plot_sign_up_trend(store_uuids):
logger.debug("Callback 'generate_plot_sign_up_trend' triggered")
df = pd.DataFrame(store_uuids.get("data"))
trend_df = None
if not df.empty and has_permission('overview_signup_trends'):
trend_df = compute_sign_up_trend(df)
fig = generate_barplot(trend_df, x = 'date', y = 'count', title = "Sign-ups trend")
fig = generate_barplot(trend_df, x='date', y='count', title="Sign-ups trend")
return fig


@callback(
Output('fig-trips-trend', 'figure'),
Input('store-trips', 'data'),
Input('date-picker', 'start_date'), # these are ISO strings
Input('date-picker', 'end_date'), # these are ISO strings
Input('date-picker', 'start_date'), # these are ISO strings
Input('date-picker', 'end_date'), # these are ISO strings
)
@log_execution_time
def generate_plot_trips_trend(store_trips, start_date, end_date):
if store_trips is None:
logger.debug("Callback 'generate_plot_trips_trend' triggered with store_trips=None")
return px.bar() # Return an empty figure or a placeholder

logger.debug("Callback 'generate_plot_trips_trend' triggered with valid inputs")
df = pd.DataFrame(store_trips.get("data"))
trend_df = None
(start_date, end_date) = iso_to_date_only(start_date, end_date)
if not df.empty and has_permission('overview_trips_trend'):
trend_df = compute_trips_trend(df, date_col = "trip_start_time_str")
fig = generate_barplot(trend_df, x = 'date', y = 'count', title = f"Trips trend({start_date} to {end_date})")
trend_df = compute_trips_trend(df, date_col="trip_start_time_str")
fig = generate_barplot(trend_df, x='date', y='count', title=f"Trips trend({start_date} to {end_date})")
return fig