Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Conversations: anonymize data #1618

Merged
merged 7 commits into from
Dec 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/bin/bash -l

# delete outdated conversations
# anonymize outdated conversations

# Do not run if this env var is not set:
if [[ -z "$CRON_CONVERSATIONS_DELETE_OUTDATED" ]]; then
echo "CRON_CONVERSATIONS_DELETE_OUTDATED not set. Exiting..."
if [[ -z "$CRON_CONVERSATIONS_ANONYMIZE_OUTDATED" ]]; then
echo "CRON_CONVERSATIONS_ANONYMIZE_OUTDATED not set. Exiting..."
exit 0
fi

Expand All @@ -19,4 +19,4 @@ fi
# $APP_HOME is set by default by clever cloud.
cd $APP_HOME

django-admin delete_outdated_conversations
django-admin anonymize_outdated_conversations
2 changes: 1 addition & 1 deletion clevercloud/cron.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"15 0 * * * $ROOT/clevercloud/stats_export_user_download_list_to_file.sh",
"30 0 * * * $ROOT/clevercloud/stats_export_user_search_list_to_file.sh",
"0 1 * * * $ROOT/clevercloud/tenders_update_count_fields.sh",
"0 6 * * * $ROOT/clevercloud/conversations_delete_outdated.sh",
"0 6 * * * $ROOT/clevercloud/conversations_anonymize_outdated.sh",
"0 7 * * 1 $ROOT/clevercloud/siaes_sync_with_emplois_inclusion.sh",
"5 7 * * 1 $ROOT/clevercloud/siaes_sync_c2_c4.sh",
"10 7 * * 1 $ROOT/clevercloud/siaes_update_api_entreprise_fields.sh",
Expand Down
3 changes: 3 additions & 0 deletions config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -833,6 +833,9 @@
}
BITOUBI_ENV_COLOR = ENV_COLOR_MAPPING.get(BITOUBI_ENV, "")

# Privacy timeouts
# ------------------------------------------------------------------------------
INACTIVE_CONVERSATION_TIMEOUT_IN_MONTHS = env.int("INACTIVE_CONVERSATION_TIMEOUT_IN_MONTHS", 6)

# Wagtail
# ------------------------------------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions lemarche/conversations/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
ATTRIBUTES_TO_SAVE_FOR_INBOUND = ["From", "To", "CC", "ReplyTo", "SentAtDate", "Attachments"]
ATTRIBUTES_TO_NOT_ANONYMIZE_FOR_INBOUND = ["SentAtDate", "Attachments"]

SOURCE_MAILJET = "MAILJET"
SOURCE_BREVO = "BREVO"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from dateutil.relativedelta import relativedelta
from django.conf import settings
from django.utils import timezone

from lemarche.conversations.constants import ATTRIBUTES_TO_NOT_ANONYMIZE_FOR_INBOUND
from lemarche.conversations.models import Conversation
from lemarche.utils.commands import BaseCommand


def clean_inbound_data(inbound_data: dict) -> dict:
"""Keep only allowed data once anonymized"""
return {key: inbound_data[key] for key in ATTRIBUTES_TO_NOT_ANONYMIZE_FOR_INBOUND}


class Command(BaseCommand):
"""
Command to anonymize outdated conversations

Note: run via a CRON every day
Usage: python manage.py anonymize_outdated_conversations
"""

def handle(self, *args, **options):
inactive_datetime = timezone.now() - relativedelta(months=settings.INACTIVE_CONVERSATION_TIMEOUT_IN_MONTHS)
outdated_conversations = Conversation.objects.filter(created_at__lte=inactive_datetime, is_anonymized=False)

for conversation in outdated_conversations:
conversation.sender_user = None
conversation.sender_email = None
conversation.sender_first_name = ""
conversation.sender_last_name = ""
conversation.initial_body_message = str(len(conversation.initial_body_message))
conversation.data = [clean_inbound_data(data) for data in conversation.data]
conversation.is_anonymized = True

Conversation.objects.bulk_update(
outdated_conversations,
fields=[
"sender_user",
"sender_email",
"sender_first_name",
"sender_last_name",
"initial_body_message",
"data",
"is_anonymized",
],
)

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by Django 4.2.15 on 2024-12-30 12:33

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("conversations", "0017_emailgroup_disabledemail_templatetransactional_group_and_more"),
]

operations = [
migrations.AddField(
model_name="conversation",
name="is_anonymized",
field=models.BooleanField(default=False, verbose_name="Est anonymisé"),
),
]
11 changes: 2 additions & 9 deletions lemarche/conversations/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from datetime import timedelta
from uuid import uuid4

from django.conf import settings
Expand Down Expand Up @@ -40,14 +39,6 @@ def get_conv_from_uuid(self, conv_uuid: str, version=1):
else:
return self.get(Q(sender_encoded__endswith=conv_uuid) | Q(siae_encoded__endswith=conv_uuid))

def outdated(self):
"""the conversations must be deleted after six month
So we get all conversations outdated with this method
"""
# we use shortcut of 30 days x 6 month because timedelta doesn't accept months
six_months_ago = timezone.now() - timedelta(days=30 * 6)
return self.filter(created_at__lte=six_months_ago)


class Conversation(models.Model):
KIND_SEARCH = "SEARCH"
Expand Down Expand Up @@ -97,6 +88,8 @@ class Conversation(models.Model):

data = models.JSONField(default=list)

is_anonymized = models.BooleanField(verbose_name="Est anonymisé", default=False)

created_at = models.DateTimeField(verbose_name="Date de création", default=timezone.now)
updated_at = models.DateTimeField(verbose_name="Date de modification", auto_now=True)
validated_at = models.DateTimeField(verbose_name="Date de validation", blank=True, null=True)
Expand Down
48 changes: 39 additions & 9 deletions lemarche/conversations/tests.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from datetime import timedelta
from datetime import datetime
from unittest.mock import patch

from django.core.exceptions import ValidationError
from django.core.management import call_command
from django.db import IntegrityError
from django.test import TestCase, TransactionTestCase
from django.test import TestCase, TransactionTestCase, override_settings
from django.utils import timezone

from lemarche.conversations import constants as conversation_constants
from lemarche.conversations.constants import ATTRIBUTES_TO_NOT_ANONYMIZE_FOR_INBOUND, ATTRIBUTES_TO_SAVE_FOR_INBOUND
from lemarche.conversations.factories import ConversationFactory, TemplateTransactionalFactory
from lemarche.conversations.models import Conversation, TemplateTransactional
from lemarche.siaes.factories import SiaeFactory
Expand Down Expand Up @@ -71,13 +74,40 @@ def test_with_answer_stats(self):
self.assertEqual(conversation_queryset.get(id=self.conversation.id).answer_count_annotated, 0)
self.assertEqual(conversation_queryset.get(id=self.conversation_with_answer.id).answer_count_annotated, 1)

def test_outdated(self):
one_year_ago = timezone.now() - timedelta(days=365)
ConversationFactory(created_at=one_year_ago)
five_weeks_ago = timezone.now() - timedelta(weeks=5)
ConversationFactory(created_at=five_weeks_ago)
self.assertEqual(Conversation.objects.all().count(), 2 + 2)
self.assertEqual(Conversation.objects.outdated().count(), 1)

@patch("django.utils.timezone.now", lambda: datetime(year=2024, month=1, day=1, tzinfo=timezone.utc))
@override_settings(
INACTIVE_CONVERSATION_TIMEOUT_IN_MONTHS=6,
)
class ConversationAnonymizationTestCase(TestCase):
"""
Check that conversation are correctly anonymized
"""

def setUp(self):
inbound_data = {key: "something" for key in ATTRIBUTES_TO_SAVE_FOR_INBOUND}
self.anonymized_inbound_data = {key: "something" for key in ATTRIBUTES_TO_NOT_ANONYMIZE_FOR_INBOUND}

ConversationFactory(
title="anonymized",
created_at=datetime(year=2023, month=6, day=1, tzinfo=timezone.utc),
initial_body_message="blabla",
data=[inbound_data, inbound_data],
)
ConversationFactory(created_at=datetime(year=2023, month=8, day=1, tzinfo=timezone.utc))

def test_anonymize_command(self):
call_command("anonymize_outdated_conversations")

conv_anonymized = Conversation.objects.get(title="anonymized", is_anonymized=True)
self.assertIsNone(conv_anonymized.sender_user)
self.assertIsNone(conv_anonymized.sender_email)
self.assertEqual(conv_anonymized.sender_first_name, "")
self.assertEqual(conv_anonymized.sender_last_name, "")
self.assertEqual(conv_anonymized.initial_body_message, "6")
self.assertEqual(conv_anonymized.data, [self.anonymized_inbound_data, self.anonymized_inbound_data])

self.assertTrue(Conversation.objects.get(is_anonymized=False), msg="active conversation wrongly anonymised !!")


class TemplateTransactionalModelTest(TestCase):
Expand Down
Loading