From 64d65f09a7c1509e36dcfbde609290bdee00acec Mon Sep 17 00:00:00 2001 From: Pritam Sukumar Date: Fri, 23 Feb 2024 11:40:12 +0530 Subject: [PATCH 1/3] [PPS] Fixing unique users (attempted) count --- .gitignore | 3 +++ app/db/quiz_db.py | 58 +++++++++++++++++++++++++++++++++++------------ app/main.py | 1 - 3 files changed, 47 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index b2b37f3..c83acda 100644 --- a/.gitignore +++ b/.gitignore @@ -161,3 +161,6 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ .vscode/settings.json + +# Zed specific +.pyrighconfig.json diff --git a/app/db/quiz_db.py b/app/db/quiz_db.py index 41de16f..51592a7 100644 --- a/app/db/quiz_db.py +++ b/app/db/quiz_db.py @@ -54,16 +54,20 @@ def get_live_quiz_stats(self, quiz_id, start_date=None, end_date=None): else: end_datetime = datetime.now() oid_end = self.__generate_objectid_for_time(end_datetime) + + # Get documents only matching quiz ID pipeline.append({"$match": {"_id": {"$lte": oid_end}}}) + # Group by user_id,and date to get unique sessions per user per day + # Note that we don't need to group by quiz because the documents already + # are matched to this quiz. + # Also include data of how many of these users finished the quiz pipeline.extend( [ { - # Group by user_id, quiz_id, and date to get unique sessions per user per day "$group": { "_id": { "user_id": "$user_id", - "quiz_id": "$quiz_id", "date": { "$dateToString": { "format": "%Y-%m-%d", @@ -73,25 +77,36 @@ def get_live_quiz_stats(self, quiz_id, start_date=None, end_date=None): }, "hasQuizEnded": {"$max": "$has_quiz_ended"}, } - }, + } + ] + ) + + # Group by date again to count how many users attempted/finished the quiz each day + pipeline.extend( + [ { - # Group by date to count unique sessions per day "$group": { "_id": "$_id.date", + "totalUniqueUsers": {"$addToSet": "$_id.user_id"}, "uniqueSessions": {"$sum": 1}, "finishedSessions": { "$sum": {"$cond": [{"$eq": ["$hasQuizEnded", True]}, 1, 0]} }, } - }, - { - # Sort the results by date - "$sort": {"_id": -1} - }, + } + ] + ) + + # Sort by date and format. + # TODO: Projecting may not be necessary at this stage. But it doesn't affect + # efficiency and makes the following steps cleaner + pipeline.extend( + [ + {"$sort": {"_id": -1}}, { - # Format the output "$project": { "_id": 0, + "totalUniqueUsers": 1, "date": "$_id", "uniqueSessions": 1, "finishedSessions": 1, @@ -100,12 +115,13 @@ def get_live_quiz_stats(self, quiz_id, start_date=None, end_date=None): ] ) - # Get the total sessions as sum of unique sessions per day + # After this stage totalUniqueUsers will be an array of arrays + # each array containing the user_ids for that particular day pipeline.append( { "$group": { "_id": None, - "totalSessions": {"$sum": "$uniqueSessions"}, + "totalUniqueUsers": {"$push": "$_id.user_id"}, "totalFinishedSessions": {"$sum": "$finishedSessions"}, "data": {"$push": "$$ROOT"}, } @@ -113,21 +129,35 @@ def get_live_quiz_stats(self, quiz_id, start_date=None, end_date=None): ) # Reshape the final output to get a nice dictionary + # To find the totalUniqueUsersCount -- + # 1. Concatenate arrays to get array of arrays + # 2. Reduce to make it a big array + # 3. Use setUnion to get only unique values pipeline.append( { "$project": { "_id": 0, - "totalSessions": 1, "totalFinishedSessions": 1, "quizTitle": 1, "daywise_results": "$data", + "totalSessions": { + "$size": { + "$setUnion": { + "$reduce": { + "input": "$data.totalUniqueUsers", + "initialValue": [], + "in": {"$concatArrays": ["$$value", "$$this"]}, + } + } + } + }, } } ) # Run the pipeline daywise_results = list(self.__db.quiz.sessions.aggregate(pipeline)) - + print(daywise_results) if len(daywise_results) > 0: daywise_results = daywise_results[0] else: diff --git a/app/main.py b/app/main.py index 29dc707..e279c21 100644 --- a/app/main.py +++ b/app/main.py @@ -34,7 +34,6 @@ allow_headers=["*"], ) - student_quiz_reports_db = ReportsDB(reports_db) quiz_db = QuizDB(quiz_db) sessions_db = SessionsDB() From 3d7685eaad445d263dbd165066c85c3ac00a3d6f Mon Sep 17 00:00:00 2001 From: Pritam Sukumar Date: Fri, 23 Feb 2024 11:42:04 +0530 Subject: [PATCH 2/3] [PPS] Removing print --- app/db/quiz_db.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/db/quiz_db.py b/app/db/quiz_db.py index 51592a7..c7281e8 100644 --- a/app/db/quiz_db.py +++ b/app/db/quiz_db.py @@ -157,7 +157,6 @@ def get_live_quiz_stats(self, quiz_id, start_date=None, end_date=None): # Run the pipeline daywise_results = list(self.__db.quiz.sessions.aggregate(pipeline)) - print(daywise_results) if len(daywise_results) > 0: daywise_results = daywise_results[0] else: From 6abb1a54ef0b25b636330e5a44958eae3d48089f Mon Sep 17 00:00:00 2001 From: Pritam Sukumar Date: Fri, 23 Feb 2024 13:18:44 +0530 Subject: [PATCH 3/3] [PPS] Removing unnecessary line --- app/db/quiz_db.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/db/quiz_db.py b/app/db/quiz_db.py index c7281e8..b7f3fb9 100644 --- a/app/db/quiz_db.py +++ b/app/db/quiz_db.py @@ -121,7 +121,6 @@ def get_live_quiz_stats(self, quiz_id, start_date=None, end_date=None): { "$group": { "_id": None, - "totalUniqueUsers": {"$push": "$_id.user_id"}, "totalFinishedSessions": {"$sum": "$finishedSessions"}, "data": {"$push": "$$ROOT"}, }