Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix/fixing unique users count #44

Merged
merged 4 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,6 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.vscode/settings.json

# Zed specific
.pyrighconfig.json
57 changes: 43 additions & 14 deletions app/db/quiz_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,20 @@ def get_live_quiz_stats(self, quiz_id, start_date=None, end_date=None):
else:
end_datetime = datetime.now()
oid_end = self.__generate_objectid_for_time(end_datetime)

# Get documents only matching quiz ID
pipeline.append({"$match": {"_id": {"$lte": oid_end}}})

# Group by user_id,and date to get unique sessions per user per day
# Note that we don't need to group by quiz because the documents already
# are matched to this quiz.
# Also include data of how many of these users finished the quiz
pipeline.extend(
[
{
# Group by user_id, quiz_id, and date to get unique sessions per user per day
"$group": {
"_id": {
"user_id": "$user_id",
"quiz_id": "$quiz_id",
"date": {
"$dateToString": {
"format": "%Y-%m-%d",
Expand All @@ -73,25 +77,36 @@ def get_live_quiz_stats(self, quiz_id, start_date=None, end_date=None):
},
"hasQuizEnded": {"$max": "$has_quiz_ended"},
}
},
}
]
)

# Group by date again to count how many users attempted/finished the quiz each day
pipeline.extend(
[
{
# Group by date to count unique sessions per day
"$group": {
"_id": "$_id.date",
"totalUniqueUsers": {"$addToSet": "$_id.user_id"},
"uniqueSessions": {"$sum": 1},
"finishedSessions": {
"$sum": {"$cond": [{"$eq": ["$hasQuizEnded", True]}, 1, 0]}
},
}
},
{
# Sort the results by date
"$sort": {"_id": -1}
},
}
]
)

# Sort by date and format.
# TODO: Projecting may not be necessary at this stage. But it doesn't affect
# efficiency and makes the following steps cleaner
pipeline.extend(
[
{"$sort": {"_id": -1}},
{
# Format the output
"$project": {
"_id": 0,
"totalUniqueUsers": 1,
"date": "$_id",
"uniqueSessions": 1,
"finishedSessions": 1,
Expand All @@ -100,34 +115,48 @@ def get_live_quiz_stats(self, quiz_id, start_date=None, end_date=None):
]
)

# Get the total sessions as sum of unique sessions per day
# After this stage totalUniqueUsers will be an array of arrays
# each array containing the user_ids for that particular day
pipeline.append(
{
"$group": {
"_id": None,
"totalSessions": {"$sum": "$uniqueSessions"},
"totalUniqueUsers": {"$push": "$_id.user_id"},
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is user_id here ? user_id doesn't exist after previous pipeline executions na? i ran this code and totalUniqueUsers coming as an empty array

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes maybe I can remove this. It's remnant from some previous attempts

"totalFinishedSessions": {"$sum": "$finishedSessions"},
"data": {"$push": "$$ROOT"},
}
}
)

# Reshape the final output to get a nice dictionary
# To find the totalUniqueUsersCount --
# 1. Concatenate arrays to get array of arrays
# 2. Reduce to make it a big array
# 3. Use setUnion to get only unique values
pipeline.append(
{
"$project": {
"_id": 0,
"totalSessions": 1,
"totalFinishedSessions": 1,
"quizTitle": 1,
"daywise_results": "$data",
"totalSessions": {
"$size": {
"$setUnion": {
"$reduce": {
"input": "$data.totalUniqueUsers",
"initialValue": [],
"in": {"$concatArrays": ["$$value", "$$this"]},
}
}
}
},
}
}
)

# Run the pipeline
daywise_results = list(self.__db.quiz.sessions.aggregate(pipeline))

if len(daywise_results) > 0:
daywise_results = daywise_results[0]
else:
Expand Down
1 change: 0 additions & 1 deletion app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
allow_headers=["*"],
)


student_quiz_reports_db = ReportsDB(reports_db)
quiz_db = QuizDB(quiz_db)
sessions_db = SessionsDB()
Expand Down
Loading