From ab513a8398a8cfa4df899e45de1945326372d0bd Mon Sep 17 00:00:00 2001 From: Vincent Porte Date: Tue, 25 Jun 2024 16:05:25 +0200 Subject: [PATCH] =?UTF-8?q?feat(stats):=20collecter=20les=20stats=20d'acti?= =?UTF-8?q?vit=C3=A9=20hebdo=20des=20forums=20de=20la=20documentation=20(#?= =?UTF-8?q?691)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Description 🎸 Interroger matomo pour collecter le nombre de visites uniques, le nombre de visites uniques entrantes et le temps passé sur les `forum` de l'espace documentation hebdomadairement 🎸 Management command planifiée chaque lundi 🎸 Capacité à reprendre un traitement échoué, en recherchant la plus récente date d'execution dans `ForumStat` ## Type de changement 🎢 Nouvelle fonctionnalité (changement non cassant qui ajoute une fonctionnalité). ### Points d'attention 🦺 Collecte limitée aux `forum` de l'espace documentation 🦺 La collecte des données mensuelles est prévue dans une PR ultérieure 🦺 harmonisation de la déclaration du modèle `Stat` dans le fichier `admin.py` --- .../collect_weekly_matomo_forum_stats.sh | 18 ++ clevercloud/cron.json | 1 + lacommunaute/stats/admin.py | 9 +- lacommunaute/stats/factories.py | 15 +- .../commands/collect_matomo_forum_stats.py | 28 +++ .../stats/migrations/0002_forumstat.py | 44 +++++ lacommunaute/stats/models.py | 29 ++++ lacommunaute/stats/tests/tests_models.py | 23 ++- lacommunaute/utils/date.py | 8 + lacommunaute/utils/matomo.py | 70 +++++++- lacommunaute/utils/tests/tests_utils.py | 162 +++++++++++++++++- 11 files changed, 398 insertions(+), 9 deletions(-) create mode 100755 clevercloud/collect_weekly_matomo_forum_stats.sh create mode 100644 lacommunaute/stats/management/commands/collect_matomo_forum_stats.py create mode 100644 lacommunaute/stats/migrations/0002_forumstat.py create mode 100644 lacommunaute/utils/date.py diff --git a/clevercloud/collect_weekly_matomo_forum_stats.sh b/clevercloud/collect_weekly_matomo_forum_stats.sh new file mode 100755 index 000000000..494ddf3e4 --- /dev/null +++ b/clevercloud/collect_weekly_matomo_forum_stats.sh @@ -0,0 +1,18 @@ +#!/bin/bash -l + +# Collect Daily Matomo Stats + +# +# About clever cloud cronjobs: +# https://www.clever-cloud.com/doc/tools/crons/ +# + +if [[ "$INSTANCE_NUMBER" != "0" ]]; then + echo "Instance number is ${INSTANCE_NUMBER}. Stop here." + exit 0 +fi + +# $APP_HOME is set by default by clever cloud. +cd $APP_HOME + +python manage.py collect_matomo_forum_stats diff --git a/clevercloud/cron.json b/clevercloud/cron.json index 8b6386ab9..8b52e8ed7 100644 --- a/clevercloud/cron.json +++ b/clevercloud/cron.json @@ -4,6 +4,7 @@ "0 5 * * * $ROOT/clevercloud/collect_daily_matomo_stats.sh", "3 5 * * * $ROOT/clevercloud/collect_daily_django_stats.sh", "5 5 1 * * $ROOT/clevercloud/collect_monthly_matomo_stats.sh", + "8 5 * * 1 $ROOT/clevercloud/collect_weekly_matomo_forum_stats.sh", "5 7-21 * * * $ROOT/clevercloud/send_notifs_when_first_reply.sh", "5 6 * * * $ROOT/clevercloud/send_notifs_when_following_replies.sh", "10 6-22 * * * $ROOT/clevercloud/add_user_to_list_when_register.sh", diff --git a/lacommunaute/stats/admin.py b/lacommunaute/stats/admin.py index 7c5f79215..ff5395f09 100644 --- a/lacommunaute/stats/admin.py +++ b/lacommunaute/stats/admin.py @@ -1,11 +1,16 @@ from django.contrib import admin -from lacommunaute.stats.models import Stat +from lacommunaute.stats.models import ForumStat, Stat +@admin.register(Stat) class StatAdmin(admin.ModelAdmin): list_display = ("name", "date", "value", "period") list_filter = ("name", "date", "period") -admin.site.register(Stat, StatAdmin) +@admin.register(ForumStat) +class ForumStatAdmin(admin.ModelAdmin): + list_display = ("date", "period", "forum", "visits", "entry_visits", "time_spent") + list_filter = ("date", "period", "forum") + raw_id_fields = ("forum",) diff --git a/lacommunaute/stats/factories.py b/lacommunaute/stats/factories.py index 7bc0b5385..b4221b5c1 100644 --- a/lacommunaute/stats/factories.py +++ b/lacommunaute/stats/factories.py @@ -3,8 +3,9 @@ import factory import factory.django +from lacommunaute.forum.factories import ForumFactory from lacommunaute.stats.enums import Period -from lacommunaute.stats.models import Stat +from lacommunaute.stats.models import ForumStat, Stat class StatFactory(factory.django.DjangoModelFactory): @@ -23,3 +24,15 @@ class Params: value=46, period="day", ) + + +class ForumStatFactory(factory.django.DjangoModelFactory): + date = factory.Faker("date") + period = Period.DAY + forum = factory.SubFactory(ForumFactory) + visits = factory.Faker("pyint") + entry_visits = factory.Faker("pyint") + time_spent = factory.Faker("pyint") + + class Meta: + model = ForumStat diff --git a/lacommunaute/stats/management/commands/collect_matomo_forum_stats.py b/lacommunaute/stats/management/commands/collect_matomo_forum_stats.py new file mode 100644 index 000000000..009de28d3 --- /dev/null +++ b/lacommunaute/stats/management/commands/collect_matomo_forum_stats.py @@ -0,0 +1,28 @@ +from datetime import date + +from dateutil.relativedelta import relativedelta +from django.core.management.base import BaseCommand + +from lacommunaute.stats.models import ForumStat +from lacommunaute.utils.date import get_last_sunday +from lacommunaute.utils.matomo import collect_forum_stats_from_matomo_api + + +class Command(BaseCommand): + help = "Collecter les stats des forum dans matomo, jusqu'au dimanche précédent l'execution" + + def handle(self, *args, **options): + period = "week" + + from_date = ForumStat.objects.filter(period=period).order_by("-date").first() + + if from_date: + from_date = from_date.date + relativedelta(days=7) + else: + from_date = date(2023, 10, 2) + + to_date = get_last_sunday(date.today()) + + collect_forum_stats_from_matomo_api(from_date=from_date, to_date=to_date, period=period) + + self.stdout.write(self.style.SUCCESS("That's all, folks!")) diff --git a/lacommunaute/stats/migrations/0002_forumstat.py b/lacommunaute/stats/migrations/0002_forumstat.py new file mode 100644 index 000000000..6ad3cd025 --- /dev/null +++ b/lacommunaute/stats/migrations/0002_forumstat.py @@ -0,0 +1,44 @@ +# Generated by Django 5.0.6 on 2024-06-24 15:02 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("forum", "0015_alter_forumrating_options"), + ("stats", "0001_initial"), + ] + + operations = [ + migrations.CreateModel( + name="ForumStat", + fields=[ + ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")), + ("date", models.DateField(verbose_name="Date")), + ( + "period", + models.CharField( + choices=[("month", "Month"), ("week", "Week"), ("day", "Day")], + max_length=10, + verbose_name="Période", + ), + ), + ("visits", models.IntegerField(default=0, verbose_name="Visites")), + ("entry_visits", models.IntegerField(default=0, verbose_name="Visites entrantes")), + ("time_spent", models.IntegerField(default=0, verbose_name="Temps passé")), + ( + "forum", + models.ForeignKey( + null=True, on_delete=django.db.models.deletion.SET_NULL, to="forum.forum", verbose_name="Forum" + ), + ), + ], + options={ + "verbose_name": "Stat de forum", + "verbose_name_plural": "Stats de forum", + "ordering": ["date", "period", "forum"], + "unique_together": {("date", "period", "forum")}, + }, + ), + ] diff --git a/lacommunaute/stats/models.py b/lacommunaute/stats/models.py index a2c4952ef..6d34d474c 100644 --- a/lacommunaute/stats/models.py +++ b/lacommunaute/stats/models.py @@ -1,5 +1,6 @@ from django.db import models +from lacommunaute.forum.models import Forum from lacommunaute.stats.enums import Period @@ -14,6 +15,10 @@ def current_month_datas(self): class Stat(models.Model): + """ + Represents a statistical data point, relative to the whole platform, for a given date and period. + """ + name = models.CharField(max_length=30, verbose_name="Nom") date = models.DateField(verbose_name="Date") value = models.IntegerField(verbose_name="Valeur") @@ -31,3 +36,27 @@ def __str__(self): return f"{self.name} - {self.date} - {self.period}" objects = StatQuerySet().as_manager() + + +class ForumStat(models.Model): + """ + Represents a statistical data point, relative to a forum, for a given date and period. + """ + + date = models.DateField(verbose_name="Date") + period = models.CharField(max_length=10, verbose_name="Période", choices=Period.choices) + forum = models.ForeignKey(Forum, on_delete=models.SET_NULL, verbose_name="Forum", null=True) + visits = models.IntegerField(verbose_name="Visites", default=0) + entry_visits = models.IntegerField(verbose_name="Visites entrantes", default=0) + time_spent = models.IntegerField(verbose_name="Temps passé", default=0) + + objects = models.Manager() + + class Meta: + verbose_name = "Stat de forum" + verbose_name_plural = "Stats de forum" + ordering = ["date", "period", "forum"] + unique_together = ("date", "period", "forum") + + def __str__(self): + return f"{self.date} - {self.period} - {self.forum}" diff --git a/lacommunaute/stats/tests/tests_models.py b/lacommunaute/stats/tests/tests_models.py index a9da2e116..cb5fb6c40 100644 --- a/lacommunaute/stats/tests/tests_models.py +++ b/lacommunaute/stats/tests/tests_models.py @@ -1,3 +1,4 @@ +import pytest # noqa from dateutil.relativedelta import relativedelta from django.db import IntegrityError from django.test import TestCase @@ -5,8 +6,8 @@ from django.utils.timezone import localdate from lacommunaute.stats.enums import Period -from lacommunaute.stats.factories import StatFactory -from lacommunaute.stats.models import Stat +from lacommunaute.stats.factories import ForumStatFactory, StatFactory +from lacommunaute.stats.models import ForumStat, Stat class StatModelTest(TestCase): @@ -31,3 +32,21 @@ def test_ordering(self): def test_empty_dataset(self): self.assertEqual(Stat.objects.current_month_datas().count(), 0) + + +class TestForumStat: + def test_ordering(self, db): + first_forumstat = ForumStatFactory(date=localdate()) + second_forumstat = ForumStatFactory( + forum=first_forumstat.forum, + date=first_forumstat.date + relativedelta(days=1), + period=first_forumstat.period, + ) + + assert list(ForumStat.objects.all()) == [first_forumstat, second_forumstat] + + def test_uniqueness(self, db): + forumstat = ForumStatFactory() + forumstat.id = None + with pytest.raises(IntegrityError): + forumstat.save() diff --git a/lacommunaute/utils/date.py b/lacommunaute/utils/date.py new file mode 100644 index 000000000..6639a7058 --- /dev/null +++ b/lacommunaute/utils/date.py @@ -0,0 +1,8 @@ +from datetime import date + +from dateutil.relativedelta import relativedelta + + +def get_last_sunday(theday=date.today()): + days_to_subtract = (theday.weekday() + 1) % 7 + return theday - relativedelta(days=days_to_subtract) diff --git a/lacommunaute/utils/matomo.py b/lacommunaute/utils/matomo.py index 3f4824350..9d8abc63e 100644 --- a/lacommunaute/utils/matomo.py +++ b/lacommunaute/utils/matomo.py @@ -5,7 +5,8 @@ from dateutil.relativedelta import relativedelta from django.conf import settings -from lacommunaute.stats.models import Stat +from lacommunaute.forum.models import Forum +from lacommunaute.stats.models import ForumStat, Stat def get_matomo_data( @@ -138,6 +139,42 @@ def get_matomo_events_data(period, search_date, nb_uniq_visitors_key="nb_uniq_vi return stats +def get_matomo_forums_data(period, search_date, label, ids=[]): + if label is None: + raise ValueError("label must be provided") + + filtered_datas = next( + ( + data.get("subtable", []) + for data in get_matomo_data(period=period, search_date=search_date, method="Actions.getPageUrls") + if data.get("label") == label + ), + [], + ) + + stats = {} + for forum_data in filtered_datas: + forum_id = int(forum_data["label"].split("-")[-1]) if forum_data["label"].split("-")[-1].isdigit() else None + + if forum_id and forum_id in ids: + # ONE forum can have multiple slugs. We need to aggregate them. + stats.setdefault( + forum_id, + { + "date": search_date.strftime("%Y-%m-%d"), + "period": period, + "visits": 0, + "entry_visits": 0, + "time_spent": 0, + }, + ) + stats[forum_id]["visits"] += forum_data.get("nb_visits", 0) + stats[forum_id]["entry_visits"] += forum_data.get("entry_nb_visits", 0) + stats[forum_id]["time_spent"] += forum_data.get("sum_time_spent", 0) + + return [{"forum_id": k, **v} for k, v in stats.items()] + + def collect_stats_from_matomo_api(period="day", from_date=date(2022, 12, 5), to_date=date.today()): """ function to get stats from matomo api, day by day from 2022-10-31 to today @@ -157,3 +194,34 @@ def collect_stats_from_matomo_api(period="day", from_date=date(2022, 12, 5), to_ from_date += relativedelta(months=1) Stat.objects.bulk_create([Stat(**stat) for stat in stats]) + + +def collect_forum_stats_from_matomo_api(period="week", from_date=date(2023, 10, 2), to_date=date.today()): + if period != "week": + raise ValueError("Only 'week' period is supported for forum stats collection.") + + forums_dict = { + forum.id: forum + for forum in Forum.objects.filter(parent__type=Forum.FORUM_CAT, level=1) + | Forum.objects.filter(type=Forum.FORUM_CAT, level=0) + } + + search_date = from_date + while search_date <= to_date: + forums_stats = get_matomo_forums_data(period, search_date, label="forum", ids=list(forums_dict.keys())) + print(f"Stats collected for {period} {search_date} ({len(forums_stats)} stats collected)") + + forum_stats_objects = [ + { + "date": stat["date"], + "period": stat["period"], + "forum": forums_dict[stat["forum_id"]], + "visits": stat["visits"], + "entry_visits": stat["entry_visits"], + "time_spent": stat["time_spent"], + } + for stat in forums_stats + ] + ForumStat.objects.bulk_create([ForumStat(**stat) for stat in forum_stats_objects]) + + search_date += relativedelta(days=7) diff --git a/lacommunaute/utils/tests/tests_utils.py b/lacommunaute/utils/tests/tests_utils.py index 27d6ac46c..ab56e459b 100644 --- a/lacommunaute/utils/tests/tests_utils.py +++ b/lacommunaute/utils/tests/tests_utils.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta +from datetime import date as datetime_date, datetime, timedelta from unittest.mock import patch import pytest @@ -17,13 +17,21 @@ from machina.core.db.models import get_model from machina.core.loading import get_class -from lacommunaute.forum.factories import ForumFactory +from lacommunaute.forum.factories import CategoryForumFactory, ForumFactory from lacommunaute.forum_conversation.factories import TopicFactory from lacommunaute.forum_conversation.forum_attachments.factories import AttachmentFactory from lacommunaute.forum_file.models import PublicFile +from lacommunaute.stats.models import ForumStat from lacommunaute.users.factories import UserFactory +from lacommunaute.utils.date import get_last_sunday from lacommunaute.utils.math import percent -from lacommunaute.utils.matomo import get_matomo_data, get_matomo_events_data, get_matomo_visits_data +from lacommunaute.utils.matomo import ( + collect_forum_stats_from_matomo_api, + get_matomo_data, + get_matomo_events_data, + get_matomo_forums_data, + get_matomo_visits_data, +) from lacommunaute.utils.perms import add_public_perms_on_forum from lacommunaute.utils.testing import parse_response_to_soup from lacommunaute.utils.urls import urlize @@ -428,6 +436,145 @@ def test_get_matomo_events_data_with_label(self): ) +@pytest.fixture(name="get_matomo_forums_data_response") +def fixture_get_matomo_forums_data_response(): + return [ + { + "label": "forum", + "subtable": [ + {"label": "forum-1", "nb_visits": 10, "entry_nb_visits": 100, "sum_time_spent": 1000}, + {"label": "forum-2", "nb_visits": 20, "entry_nb_visits": 200, "sum_time_spent": 2000}, + {"label": "foruX-2", "nb_visits": 21, "entry_nb_visits": 201, "sum_time_spent": 2001}, + {"label": "forum-4", "nb_visits": 14, "entry_nb_visits": 104, "sum_time_spent": 1004}, + ], + }, + { + "label": "home", + "subtable": [ + {"label": "forum-3", "nb_visits": 30, "entry_nb_visits": 300, "sum_time_spent": 3000}, + ], + }, + ] + + +class TestGetMatomoForumsData: + def test_label_is_none(self): + with pytest.raises(ValueError) as value_error: + get_matomo_forums_data("week", datetime_date(2024, 5, 6), None) + + assert str(value_error.value) == "label must be provided" + + def test_no_ids(self, get_matomo_forums_data_response): + with patch("lacommunaute.utils.matomo.get_matomo_data") as mock_get_matomo_data: + mock_get_matomo_data.return_value = get_matomo_forums_data_response + assert get_matomo_forums_data("week", datetime_date(2024, 5, 6), "forum") == [] + + def test_with_ids(self, get_matomo_forums_data_response): + with patch("lacommunaute.utils.matomo.get_matomo_data") as mock_get_matomo_data: + mock_get_matomo_data.return_value = get_matomo_forums_data_response + assert get_matomo_forums_data("week", datetime_date(2024, 5, 6), "forum", ids=[1, 4]) == [ + { + "forum_id": 1, + "date": "2024-05-06", + "period": "week", + "visits": 10, + "entry_visits": 100, + "time_spent": 1000, + }, + { + "forum_id": 4, + "date": "2024-05-06", + "period": "week", + "visits": 14, + "entry_visits": 104, + "time_spent": 1004, + }, + ] + + def test_deduplication(self, get_matomo_forums_data_response): + with patch("lacommunaute.utils.matomo.get_matomo_data") as mock_get_matomo_data: + mock_get_matomo_data.return_value = get_matomo_forums_data_response + assert get_matomo_forums_data("week", datetime_date(2024, 5, 6), "forum", ids=[2]) == [ + { + "forum_id": 2, + "date": "2024-05-06", + "period": "week", + "visits": 41, + "entry_visits": 401, + "time_spent": 4001, + }, + ] + + +class TestCollectForumStatsFromMatomoApi: + def test_unsupported_period(self): + with pytest.raises(ValueError) as value_error: + collect_forum_stats_from_matomo_api("unsupported", datetime_date(2024, 5, 6), datetime_date(2024, 5, 13)) + + assert str(value_error.value) == "Only 'week' period is supported for forum stats collection." + + def test_collect_forum_stats_from_matomo_api(self, db): + forum_1 = ForumFactory() + forum_2 = ForumFactory() + catergory_forum = CategoryForumFactory(with_child=True) + child_category_forum = catergory_forum.children.first() + + nb_visits_faker_1 = faker.random_int() + entry_nb_visits_faker_1 = faker.random_int() + sum_time_spent_faker_1 = faker.random_int() + nb_visits_faker_2 = faker.random_int() + entry_nb_visits_faker_2 = faker.random_int() + sum_time_spent_faker_2 = faker.random_int() + + matomo_response = [ + { + "label": "forum", + "subtable": [ + {"label": f"forum-{forum_2.pk}", "nb_visits": 3, "entry_nb_visits": 4, "sum_time_spent": 200}, + { + "label": f"forum-{catergory_forum.pk}", + "nb_visits": nb_visits_faker_1, + "entry_nb_visits": entry_nb_visits_faker_1, + "sum_time_spent": sum_time_spent_faker_1, + }, + { + "label": f"forum-{child_category_forum.pk}", + "nb_visits": nb_visits_faker_2, + "entry_nb_visits": entry_nb_visits_faker_2, + "sum_time_spent": sum_time_spent_faker_2, + }, + ], + }, + ] + + with patch("lacommunaute.utils.matomo.get_matomo_data") as mock_get_matomo_data: + mock_get_matomo_data.return_value = matomo_response + collect_forum_stats_from_matomo_api( + period="week", from_date=datetime_date(2024, 5, 6), to_date=datetime_date(2024, 5, 13) + ) + + assert ForumStat.objects.count() == 4 + assert ForumStat.objects.filter(forum__in=[forum_1, forum_2]).count() == 0 + category_forum_20240506 = ForumStat.objects.get( + forum=catergory_forum, date=datetime(2024, 5, 6), period="week" + ) + assert category_forum_20240506.visits == nb_visits_faker_1 + assert category_forum_20240506.entry_visits == entry_nb_visits_faker_1 + assert category_forum_20240506.time_spent == sum_time_spent_faker_1 + child_category_forum_20240506 = ForumStat.objects.get( + forum=child_category_forum, date=datetime(2024, 5, 6), period="week" + ) + assert child_category_forum_20240506.visits == nb_visits_faker_2 + assert child_category_forum_20240506.entry_visits == entry_nb_visits_faker_2 + assert child_category_forum_20240506.time_spent == sum_time_spent_faker_2 + assert ( + ForumStat.objects.filter( + forum__in=[catergory_forum, child_category_forum], date=datetime(2024, 5, 13), period="week" + ).count() + == 2 + ) + + class UtilsMathPercent(TestCase): def test_percent(self): self.assertEqual(percent(2, 1), 200) @@ -535,3 +682,12 @@ def test_size_validator(self, db): with pytest.raises(Exception): file.file.size = 1024 * 1024 * 5 + 1 file.save() + + +class TestTheLastSunday: + @pytest.mark.parametrize( + "day, expected_sunday", + [(i, datetime(2024, 5, 12)) for i in range(12, 19)] + [(i, datetime(2024, 5, 19)) for i in range(19, 26)], + ) + def test_the_last_sunday(self, day, expected_sunday): + assert get_last_sunday(datetime(2024, 5, day)) == expected_sunday