From 186fc737123d68cc88e09c5636b91035e73dc8be Mon Sep 17 00:00:00 2001 From: Shankari Date: Sat, 21 Dec 2024 17:50:49 -0800 Subject: [PATCH] Refactored for improved timing -- using stats from server profile **Overview** Enhances the user profile data retrieval process by incorporating newly stored statistics (`pipeline_range`, `total_trips`, `labeled_trips`, and `last_call`) into the existing `with` block. These statistics are now seamlessly integrated into the `user` dictionary, providing a more comprehensive set of user metrics for further analysis and dashboard visualization. 1. **Enhanced Profile Data Retrieval** - **Existing Functionality**: - The `with` block previously retrieved basic profile information such as `platform`, `manufacturer`, `app_version`, `os_version`, and `phone_lang` from the database and assigned them to the `user` dictionary. - **New Enhancements**: - **Pipeline Range**: - Retrieved `pipeline_range` data, including `start_ts` and `end_ts`, to understand the timeframe of the user's activity pipeline. - **Trip Counts**: - Extracted `total_trips` and `labeled_trips` to provide insights into the user's trip data and the extent of trip labeling. - **Last API Call Timestamp**: - Obtained `last_call` to track the most recent API interaction by the user. - **Optional Formatting**: - Included a formatted version of the `last_call` timestamp for better readability in reports and dashboards. 2. **Updated `with` Block Structure** - **Integration of New Statistics**: - The `with` block now not only fetches and assigns basic profile attributes but also retrieves and assigns the newly stored statistics from the database. Additional commits squashed into this: - Accidentally removed the process loop - Simplified else block for user data - Merge - Addressed comment, modified else if block - Updated COLS to be uniform like the other constants. Updated ADD USER STATS --- utils/constants.py | 2 +- utils/db_utils.py | 74 ++++++++++++++++---------------------------- utils/permissions.py | 2 +- 3 files changed, 28 insertions(+), 50 deletions(-) diff --git a/utils/constants.py b/utils/constants.py index 877c020c..b6bed367 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -35,7 +35,7 @@ "inferred_section_summary", ] -valid_uuids_columns = [ +VALID_UUIDS_COLS = [ 'user_token', 'user_id', 'update_ts', diff --git a/utils/db_utils.py b/utils/db_utils.py index 387b45c6..1c8fc2d5 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -437,57 +437,35 @@ def add_user_stats(user_data, batch_size=5): def process_user(user): with ect.Timer() as process_user_timer: user_uuid = UUID(user['user_id']) - - # Fetch aggregated data for all users once and cache it - ts_aggregate = esta.TimeSeries.get_aggregate_time_series() - - # Fetch data for the user, cached for repeated queries profile_data = edb.get_profile_db().find_one({'user_id': user_uuid}) + # Fetch data for the user, cached for repeated queries + logging.info(f'keyspr: {profile_data}') + if not profile_data: + profile_data = {} + # Assign existing profile attributes to the user dictionary + user['platform'] = profile_data.get('curr_platform') + user['manufacturer'] = profile_data.get('manufacturer') + user['app_version'] = profile_data.get('client_app_version') + user['os_version'] = profile_data.get('client_os_version') + user['phone_lang'] = profile_data.get('phone_lang') - total_trips = ts_aggregate.find_entries_count( - key_list=["analysis/confirmed_trip"], - extra_query_list=[{'user_id': user_uuid}] - ) - labeled_trips = ts_aggregate.find_entries_count( - key_list=["analysis/confirmed_trip"], - extra_query_list=[{'user_id': user_uuid}, {'data.user_input': {'$ne': {}}}] - ) + # Assign newly stored statistics to the user dictionary + user['total_trips'] = profile_data.get('total_trips') + user['labeled_trips'] = profile_data.get('labeled_trips') - user['total_trips'] = total_trips - user['labeled_trips'] = labeled_trips - - if profile_data: - user['platform'] = profile_data.get('curr_platform') - user['manufacturer'] = profile_data.get('manufacturer') - user['app_version'] = profile_data.get('client_app_version') - user['os_version'] = profile_data.get('client_os_version') - user['phone_lang'] = profile_data.get('phone_lang') - - if total_trips > 0: - ts = esta.TimeSeries.get_time_series(user_uuid) - first_trip_ts = ts.get_first_value_for_field( - key='analysis/confirmed_trip', - field='data.end_ts', - sort_order=pymongo.ASCENDING - ) - if first_trip_ts != -1: - user['first_trip'] = arrow.get(first_trip_ts).format(time_format) - - last_trip_ts = ts.get_first_value_for_field( - key='analysis/confirmed_trip', - field='data.end_ts', - sort_order=pymongo.DESCENDING - ) - if last_trip_ts != -1: - user['last_trip'] = arrow.get(last_trip_ts).format(time_format) - - last_call_ts = ts.get_first_value_for_field( - key='stats/server_api_time', - field='data.ts', - sort_order=pymongo.DESCENDING - ) - if last_call_ts != -1: - user['last_call'] = arrow.get(last_call_ts).format(time_format) + # Retrieve and assign pipeline range + pipeline_range = profile_data.get('pipeline_range', {}) + start_ts = pipeline_range.get('start_ts') + end_ts = pipeline_range.get('end_ts') + if start_ts: + user['first_trip'] = arrow.get(start_ts).format(time_format) + if end_ts: + user['last_trip'] = arrow.get(end_ts).format(time_format) + + # Retrieve and assign last API call timestamp + last_call_ts = profile_data.get('last_call_ts') + if last_call_ts: + user['last_call'] = arrow.get(last_call_ts).format('YYYY-MM-DD') esdsq.store_dashboard_time( "admin/db_utils/add_user_stats/process_user", diff --git a/utils/permissions.py b/utils/permissions.py index 2ae9c9bb..444d0cda 100644 --- a/utils/permissions.py +++ b/utils/permissions.py @@ -93,7 +93,7 @@ def get_allowed_trip_columns(): def get_uuids_columns(): - columns = set(constants.valid_uuids_columns) + columns = set(constants.VALID_UUIDS_COLS) for column in permissions.get("data_uuids_columns_exclude", []): columns.discard(column) return columns