Skip to content

Commit

Permalink
WIP analytics:
Browse files Browse the repository at this point in the history
* models/analitics.py
 - added HourlyUserSummary and HourlyGroupSummary
   these models have an additional field "hour"
 - renamed UserDailySummary and GroupDailySummary to DailyUserSummary and DailyGroupSummary
 - added last_summarized_at to Session
 - renames compiled to summarized in Event, DailyUserSummary, DailyGroupSummary
   use summarised field in the new models HourlyUserSummary and HourlyGroupSummary as well
* updates and renames migrations to reflect the model and field name changes
* management command askbot_compile_analytics_events
 - populates HourlyUserSummary and HourlyGroupSummary
   in addition to DailyUserSummary and DailyGroupSummary
 - HourlyUserSummary and HourlyGroupSummary are summarized
   only after the hour is completed
  • Loading branch information
evgenyfadeev committed Jul 30, 2024
1 parent 9a9fc37 commit d8de648
Show file tree
Hide file tree
Showing 7 changed files with 239 additions and 69 deletions.
165 changes: 121 additions & 44 deletions askbot/management/commands/askbot_compile_analytics_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
per-user and per-group Summary tables.
"""
import datetime
from django.db import transaction
from django.db import models, transaction
from django.core.management.base import BaseCommand
from askbot.utils.console import ProgressBar
from askbot.models.analytics import Event, GroupDailySummary, UserDailySummary
from askbot.models.analytics import (
Event, DailyGroupSummary, HourlyGroupSummary,
DailyUserSummary, HourlyUserSummary, Session
)

class Command(BaseCommand): # pylint: disable=missing-class-docstring, too-few-public-methods

Expand All @@ -15,70 +18,144 @@ def add_arguments(self, parser): # pylint: disable=missing-function-docstring

def handle(self, *args, **options): # pylint: disable=missing-function-docstring
"""
Filters uncompiled analytics events.
Filters unsummarized analytics events.
Iterates over the events, and calculates per user summaries
per date.
THen iterates over the per-user summaries and combines them
into the per-group summaries.
"""
events = Event.objects.filter(compiled=False).order_by('timestamp') # pylint: disable=no-member
now = datetime.datetime.now()
self.summarize_events(options) # to hourly user summaries
self.extract_time_on_site_from_sessions(options)
self.compile_hourly_user_summaries(options, now) # to daily user and hourly group summaries
self.compile_hourly_group_summaries(options, now)


def summarize_events(self, options):
"""Compiles events into daily per-user summaries"""
events = Event.objects.filter(summarized=False).order_by('timestamp') # pylint: disable=no-member
events_count = events.count()
message = 'Compiling Events:'
silent = options['silent']
# 1) Populate daily summaries per user
for event in ProgressBar(events.iterator(), events_count, message=message, silent=silent):
self.compile_event(event)

daily_summaries = UserDailySummary.objects.filter(compiled=False).order_by('date') # pylint: disable=no-member
message = 'Compiling User Daily Summaries:'
summaries_count = daily_summaries.count()
iterator = daily_summaries.iterator()
for daily_summary in ProgressBar(iterator, summaries_count, message=message, silent=silent):
self.compile_user_daily_summary(daily_summary)

# todo:
# update the time on site (how?)
# update the total number of users per group
# maybe: record number of active users per group within period
message = 'Count users per group:'
group_daily_summaries = GroupDailySummary.objects.filter(compiled=False) # pylint: disable=no-member
count = group_daily_summaries.count()
iterator = group_daily_summaries.iterator() # pylint: disable=no-member
for group_summary in ProgressBar(iterator, count, message=message, silent=silent):
self.update_users_count_per_group(group_summary)
self.summarize_event(event)


@transaction.atomic
def update_users_count_per_group(self, group_summary):
"""Counts the number of users in the group at the end of the day"""
join_date_cutoff = group_summary.date + datetime.timedelta(days=1)
users = group_summary.group.user_set.filter(date_joined__lte=join_date_cutoff) # pylint: disable=no-member
group_summary.num_users = users.count()
group_summary.compiled = True
group_summary.save()


@transaction.atomic
def compile_event(self, event):
def summarize_event(self, event):
"""Adds up event stats into the user daily summary"""
date = event.timestamp.date()
hour = event.timestamp.replace(minute=0, second=0, microsecond=0)
user = event.session.user
user_summary, _ = UserDailySummary.objects.get_or_create(date=date, # pylint: disable=no-member
user_summary, _ = HourlyUserSummary.objects.get_or_create(hour=hour, # pylint: disable=no-member
user=user)
user_summary.add_event(event)
user_summary.save()
Event.objects.filter(id=event.id).update(compiled=True) # pylint: disable=no-member
Event.objects.filter(id=event.id).update(summarized=True) # pylint: disable=no-member


def extract_time_on_site_from_sessions(self, options):
"""Updates the time on site in the per-user daily summaries"""
message = 'Updating the time on site:'
sessions = Session.objects.filter(last_summarized_at__lt=models.F('updated_at')) # pylint: disable=no-member
sessions = sessions.order_by('updated_at')
for session in ProgressBar(sessions.iterator(), sessions.count(),
message=message, silent=options['silent']):
self.extract_time_on_site_from_session(session)


@transaction.atomic
def compile_user_daily_summary(self, user_daily_summary):
groups = user_daily_summary.user.get_groups(used_for_analytics=True)
def extract_time_on_site_from_session(self, session):
"""Calculates the time on site for the session"""
if session.updated_at <= session.last_summarized_at:
return

sess_start = session.created_at
sess_end = session.updated_at
user_id = session.user_id
hour = sess_start.replace(minute=0, second=0, microsecond=0)
while hour <= sess_end:
window_start = max(sess_start, hour)
window_end = min(sess_end, hour + datetime.timedelta(hours=1))
window_duration = window_end - window_start
summary, _ = HourlyUserSummary.objects.get_or_create(hour=hour, user_id=user_id) # pylint: disable=no-member
summary.time_on_site += window_duration
summary.save()

hour += datetime.timedelta(hours=1)

session.last_summarized_at = sess_end
session.save()


def compile_hourly_user_summaries(self, options, cutoff_time):
"""Compiles hourly per-user summaries into daily per-user summaries"""
hourly_summaries = HourlyUserSummary.objects.filter(summarized=False) # pylint: disable=no-member
cutoff_hour = cutoff_time.replace(minute=0, second=0, microsecond=0)
hourly_summaries = hourly_summaries.filter(hour__lt=cutoff_hour) # hour must be completed
hourly_summaries = hourly_summaries.order_by('hour')
count = hourly_summaries.count()
message = 'Compiling User Hourly Summaries:'
silent = options['silent']
for hourly_summary in ProgressBar(hourly_summaries.iterator(), count,
message=message, silent=silent):
self.compile_hourly_user_summary(hourly_summary)


@transaction.atomic
def compile_hourly_user_summary(self, hourly_user_summary):
"""Adds up user hourly summaries into the user daily summaries
and the group hourly summaries"""
groups = hourly_user_summary.user.get_groups(used_for_analytics=True)
hour = hourly_user_summary.hour
for group in groups:
date = user_daily_summary.date
group_summary, _ = GroupDailySummary.objects.get_or_create(date=date, # pylint: disable=no-member
group=group)
group_summary += user_daily_summary
group_summary, _ = HourlyGroupSummary.objects.get_or_create(hour=hour, # pylint: disable=no-member
group=group)
group_summary += hourly_user_summary
group_summary.save()

UserDailySummary.objects.filter(id=user_daily_summary.id).update(compiled=True) # pylint: disable=no-member
daily_user_summary, _ = DailyUserSummary.objects.get_or_create( # pylint: disable=no-member
date=hourly_user_summary.hour.date(),
user=hourly_user_summary.user)
daily_user_summary += hourly_user_summary
daily_user_summary.save()

HourlyUserSummary.objects.filter(id=hourly_user_summary.id).update(summarized=True) # pylint: disable=no-member


def compile_hourly_group_summaries(self, options, cutoff_time):
"""
1. Compiles hourly per-group summaries into daily per-group summaries
2. Updates the total number of users in the group that joined before the end of the hour
"""
message = 'Compile hourly group summaries: '
hourly_group_summaries = HourlyGroupSummary.objects.filter(summarized=False) # pylint: disable=no-member
cutoff_hour = cutoff_time.replace(minute=0, second=0, microsecond=0)
# hour must be completed
hourly_group_summaries = hourly_group_summaries.filter(hour__lt=cutoff_hour)
count = hourly_group_summaries.count()
iterator = hourly_group_summaries.iterator() # pylint: disable=no-member
for group_summary in ProgressBar(iterator, count, message=message,
silent=options['silent']):
self.compile_hourly_group_summary(group_summary)


@transaction.atomic
def compile_hourly_group_summary(self, hourly_group_summary):
"""
1. Adds hourly per-group summary into daily per-group summary
2. Updates the total number of users in the group that joined before the end of the hour
"""
join_date_cutoff = hourly_group_summary.hour + datetime.timedelta(hours=1)
users = hourly_group_summary.group.user_set.filter(date_joined__lt=join_date_cutoff) # pylint: disable=no-member
hourly_group_summary.num_users = users.count()

daily_group_summary, _ = DailyGroupSummary.objects.get_or_create( # pylint: disable=no-member
date=hourly_group_summary.hour.date(),
group=hourly_group_summary.group)
daily_group_summary += hourly_group_summary
daily_group_summary.save()

hourly_group_summary.summarized = True
hourly_group_summary.save()
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class Migration(migrations.Migration):

operations = [
migrations.CreateModel(
name='UserDailySummary',
name='DailyUserSummary',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('num_questions', models.PositiveIntegerField(default=0)),
Expand All @@ -31,7 +31,7 @@ class Migration(migrations.Migration):
},
),
migrations.CreateModel(
name='GroupDailySummary',
name='DailyGroupSummary',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('num_questions', models.PositiveIntegerField(default=0)),
Expand Down
2 changes: 1 addition & 1 deletion askbot/migrations/0029_group_visibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
class Migration(migrations.Migration):

dependencies = [
('askbot', '0028_userdailysummary_groupdailysummary'),
('askbot', '0028_dailyusersummary_dailygroupsummary'),
]

operations = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,22 @@ class Migration(migrations.Migration):
operations = [
migrations.AddField(
model_name='event',
name='compiled',
field=models.BooleanField(default=False, help_text='True if the event is compiled into a summary'),
name='summarized',
field=models.BooleanField(default=False, help_text='True if the event is included into a summary'),
),
migrations.AddField(
model_name='group',
name='used_for_analytics',
field=models.BooleanField(default=False),
),
migrations.AddField(
model_name='groupdailysummary',
name='compiled',
model_name='dailygroupsummary',
name='summarized',
field=models.BooleanField(default=False),
),
migrations.AddField(
model_name='userdailysummary',
name='compiled',
model_name='dailyusersummary',
name='summarized',
field=models.BooleanField(default=False),
),
]
29 changes: 29 additions & 0 deletions askbot/migrations/0031_session_last_summarized_at.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Generated by Django 4.2.4 on 2024-07-22 15:52
# edited by hand to add the populate_last_summarized_at function
# and remove the null=True from the field
from django.db import migrations, models

def populate_last_summarized_at(apps, schema_editor):
Session = apps.get_model('askbot', 'Session')
Session.objects.all().update(last_summarized_at=models.F('created_at'))


class Migration(migrations.Migration):

dependencies = [
('askbot', '0030_event_summarized_group_used_for_analytics_and_more'),
]

operations = [
migrations.AddField(
model_name='session',
name='last_summarized_at',
field=models.DateTimeField(null=True),
),
migrations.RunPython(populate_last_summarized_at, reverse_code=migrations.RunPython.noop),
migrations.AlterField(
model_name='session',
name='last_summarized_at',
field=models.DateTimeField(null=False)
),
]
54 changes: 54 additions & 0 deletions askbot/migrations/0032_hourlyusersummary_hourlygroupsummary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Generated by Django 4.2.4 on 2024-07-28 21:52

import datetime
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('askbot', '0031_session_last_summarized_at'),
]

operations = [
migrations.CreateModel(
name='HourlyUserSummary',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('num_questions', models.PositiveIntegerField(default=0)),
('num_answers', models.PositiveIntegerField(default=0)),
('num_upvotes', models.PositiveIntegerField(default=0)),
('num_downvotes', models.PositiveIntegerField(default=0)),
('question_views', models.PositiveIntegerField(default=0)),
('time_on_site', models.DurationField(default=datetime.timedelta(0))),
('summarized', models.BooleanField(default=False)),
('hour', models.DateTimeField(db_index=True)),
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='HourlyGroupSummary',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('num_questions', models.PositiveIntegerField(default=0)),
('num_answers', models.PositiveIntegerField(default=0)),
('num_upvotes', models.PositiveIntegerField(default=0)),
('num_downvotes', models.PositiveIntegerField(default=0)),
('question_views', models.PositiveIntegerField(default=0)),
('time_on_site', models.DurationField(default=datetime.timedelta(0))),
('summarized', models.BooleanField(default=False)),
('hour', models.DateTimeField(db_index=True)),
('num_users', models.PositiveIntegerField(default=0)),
('group', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='askbot.group')),
],
options={
'abstract': False,
},
),
]
Loading

0 comments on commit d8de648

Please sign in to comment.