From 2479c1a64c4baac47fc4da821ba5e12204a55834 Mon Sep 17 00:00:00 2001 From: Mark Liffiton Date: Wed, 4 Dec 2024 23:56:18 -0600 Subject: [PATCH] Centralize privacy/deletion handler registration and verification. --- src/codehelp/__init__.py | 4 +- src/codehelp/deletion_handler.py | 117 ++++++++++++++++++++++++++++++ src/codehelp/privacy.py | 115 ----------------------------- src/gened/base.py | 7 +- src/gened/data_deletion.py | 51 +++++++++++++ src/gened/instructor.py | 28 +------ src/gened/profile.py | 13 +--- src/starburst/__init__.py | 4 +- src/starburst/deletion_handler.py | 41 +++++++++++ src/starburst/privacy.py | 26 ------- 10 files changed, 224 insertions(+), 182 deletions(-) create mode 100644 src/codehelp/deletion_handler.py delete mode 100644 src/codehelp/privacy.py create mode 100644 src/gened/data_deletion.py create mode 100644 src/starburst/deletion_handler.py delete mode 100644 src/starburst/privacy.py diff --git a/src/codehelp/__init__.py b/src/codehelp/__init__.py index e0b3f14..85a91c7 100644 --- a/src/codehelp/__init__.py +++ b/src/codehelp/__init__.py @@ -9,7 +9,7 @@ from gened import base -from . import admin, context_config, helper, privacy, tutor +from . import admin, context_config, deletion_handler, helper, tutor def create_app(test_config: dict[str, Any] | None = None, instance_path: Path | None = None) -> Flask: @@ -42,7 +42,7 @@ def create_app(test_config: dict[str, Any] | None = None, instance_path: Path | app_config = app_config | test_config # register app-specific functionality with gened - privacy.register_with_gened() + deletion_handler.register_with_gened() admin.register_with_gened() # create the base application diff --git a/src/codehelp/deletion_handler.py b/src/codehelp/deletion_handler.py new file mode 100644 index 0000000..01da0f8 --- /dev/null +++ b/src/codehelp/deletion_handler.py @@ -0,0 +1,117 @@ +# SPDX-FileCopyrightText: 2024 Mark Liffiton +# +# SPDX-License-Identifier: AGPL-3.0-only + +"""Implementation of personal data deletion for CodeHelp.""" + +from gened.data_deletion import register_handler +from gened.db import get_db + + +class CodeHelpDeletionHandler: + """CodeHelp implementation of personal data deletion.""" + + def delete_user_data(self, user_id: int) -> None: + """Delete/Anonymize personal data for a user while preserving non-personal data for analysis.""" + db = get_db() + + # Anonymize personal data in queries + db.execute(""" + UPDATE queries + SET code = CASE + WHEN code IS NOT NULL THEN '[deleted]' + ELSE NULL + END, + error = CASE + WHEN error IS NOT NULL THEN '[deleted]' + ELSE NULL + END, + issue = '[deleted]', + context_name = '[deleted]', + context_string_id = NULL, + user_id = -1 + WHERE user_id = ? + """, [user_id]) + + # Anonymize personal data in chats + db.execute(""" + UPDATE chats + SET topic = '[deleted]', + chat_json = '[]', + context_name = '[deleted]', + context_string_id = NULL, + user_id = -1 + WHERE user_id = ? + """, [user_id]) + + db.commit() + + def delete_class_data(self, class_id: int) -> None: + """Delete/Anonymize personal data for a class while preserving non-personal data for analysis.""" + db = get_db() + + # Remove context names and configs as they may contain personal information + db.execute(""" + UPDATE contexts + SET name = '[deleted]' || id, + config = '{}' + WHERE class_id = ? + """, [class_id]) + + # Remove context strings as they may contain personal information + db.execute(""" + DELETE FROM context_strings + WHERE id IN ( + SELECT context_string_id + FROM queries + WHERE role_id IN ( + SELECT id FROM roles WHERE class_id = ? + ) + UNION + SELECT context_string_id + FROM chats + WHERE role_id IN ( + SELECT id FROM roles WHERE class_id = ? + ) + ) + """, [class_id, class_id]) + + # Anonymize personal data in queries + db.execute(""" + UPDATE queries + SET code = CASE + WHEN code IS NOT NULL THEN '[deleted]' + ELSE NULL + END, + error = CASE + WHEN error IS NOT NULL THEN '[deleted]' + ELSE NULL + END, + issue = '[deleted]', + context_name = '[deleted]', + context_string_id = NULL, + user_id = -1 + WHERE role_id IN ( + SELECT id FROM roles WHERE class_id = ? + ) + """, [class_id]) + + # Anonymize personal data in chats + db.execute(""" + UPDATE chats + SET topic = '[deleted]', + chat_json = '[]', + context_name = '[deleted]', + context_string_id = NULL, + user_id = -1 + WHERE role_id IN ( + SELECT id FROM roles WHERE class_id = ? + ) + """, [class_id]) + + db.commit() + + +def register_with_gened() -> None: + """Register CodeHelp deletion handler with the gened framework.""" + register_handler(CodeHelpDeletionHandler()) diff --git a/src/codehelp/privacy.py b/src/codehelp/privacy.py deleted file mode 100644 index 538b9bb..0000000 --- a/src/codehelp/privacy.py +++ /dev/null @@ -1,115 +0,0 @@ -# SPDX-FileCopyrightText: 2024 Mark Liffiton -# -# SPDX-License-Identifier: AGPL-3.0-only - -from gened.db import get_db -from gened.instructor import register_class_deletion_handler -from gened.profile import register_user_deletion_handler - - -def delete_user_data(user_id: int) -> None: - """Delete/Anonymize personal data for a user while preserving non-personal data for analysis.""" - db = get_db() - - # Anonymize personal data in queries - db.execute(""" - UPDATE queries - SET code = CASE - WHEN code IS NOT NULL THEN '[deleted]' - ELSE NULL - END, - error = CASE - WHEN error IS NOT NULL THEN '[deleted]' - ELSE NULL - END, - issue = '[deleted]', - context_name = '[deleted]', - context_string_id = NULL, - user_id = -1 - WHERE user_id = ? - """, [user_id]) - - # Anonymize personal data in chats - db.execute(""" - UPDATE chats - SET topic = '[deleted]', - chat_json = '[]', - context_name = '[deleted]', - context_string_id = NULL, - user_id = -1 - WHERE user_id = ? - """, [user_id]) - - db.commit() - - -def delete_class_data(class_id: int) -> None: - """Delete/Anonymize personal data for a class while preserving non-personal data for analysis.""" - db = get_db() - - # Remove context names and configs as they may contain personal information - db.execute(""" - UPDATE contexts - SET name = '[deleted]' || id, - config = '{}' - WHERE class_id = ? - """, [class_id]) - - # Remove context strings as they may contain personal information - db.execute(""" - DELETE FROM context_strings - WHERE id IN ( - SELECT context_string_id - FROM queries - WHERE role_id IN ( - SELECT id FROM roles WHERE class_id = ? - ) - UNION - SELECT context_string_id - FROM chats - WHERE role_id IN ( - SELECT id FROM roles WHERE class_id = ? - ) - ) - """, [class_id, class_id]) - - # Anonymize personal data in queries - db.execute(""" - UPDATE queries - SET code = CASE - WHEN code IS NOT NULL THEN '[deleted]' - ELSE NULL - END, - error = CASE - WHEN error IS NOT NULL THEN '[deleted]' - ELSE NULL - END, - issue = '[deleted]', - context_name = '[deleted]', - context_string_id = NULL, - user_id = -1 - WHERE role_id IN ( - SELECT id FROM roles WHERE class_id = ? - ) - """, [class_id]) - - # Anonymize personal data in chats - db.execute(""" - UPDATE chats - SET topic = '[deleted]', - chat_json = '[]', - context_name = '[deleted]', - context_string_id = NULL, - user_id = -1 - WHERE role_id IN ( - SELECT id FROM roles WHERE class_id = ? - ) - """, [class_id]) - - db.commit() - - -def register_with_gened() -> None: - """ Register privacy functionality with the main gened module.""" - register_class_deletion_handler(delete_class_data) - register_user_deletion_handler(delete_user_data) diff --git a/src/gened/base.py b/src/gened/base.py index d0db1b0..aec6bae 100644 --- a/src/gened/base.py +++ b/src/gened/base.py @@ -20,6 +20,7 @@ auth, class_config, classes, + data_deletion, db, demo, docs, @@ -164,10 +165,14 @@ def create_app_base(import_name: str, app_config: dict[str, Any], instance_path: # configure the application app.config.from_mapping(total_config) + # verify deletion handler is registered + if data_deletion.get_handler() is None: + app.logger.error("No deletion handler registered. All Gen-Ed applications must provide one.") + sys.exit(1) + admin.init_app(app) db.init_app(app) filters.init_app(app) - instructor.init_app(app) # This will verify data deletion handler is registered migrate.init_app(app) oauth.init_app(app) tz.init_app(app) diff --git a/src/gened/data_deletion.py b/src/gened/data_deletion.py new file mode 100644 index 0000000..5fdccc1 --- /dev/null +++ b/src/gened/data_deletion.py @@ -0,0 +1,51 @@ +# SPDX-FileCopyrightText: 2024 Mark Liffiton +# +# SPDX-License-Identifier: AGPL-3.0-only + +"""Interface and registry for personal data deletion handlers. + +This module defines the interface that Gen-Ed applications must implement +for handling personal data deletion and provides the registration mechanism +for those handlers. +""" + +from typing import Protocol + + +class DeletionHandler(Protocol): + """Protocol defining the interface for personal data deletion handlers.""" + def delete_user_data(self, user_id: int) -> None: + """Delete/anonymize all personal data for the given user.""" + ... + + def delete_class_data(self, class_id: int) -> None: + """Delete/anonymize all personal data for the given class.""" + ... + + +_handler: DeletionHandler | None = None + + +def register_handler(handler: DeletionHandler) -> None: + """Register the application's deletion handler implementation.""" + global _handler + _handler = handler + + +def get_handler() -> DeletionHandler | None: + """Get the registered deletion handler.""" + return _handler + + +def delete_user_data(user_id: int) -> None: + """Delete/anonymize all personal data for the given user.""" + if _handler is None: + raise RuntimeError("No deletion handler registered") + _handler.delete_user_data(user_id) + + +def delete_class_data(class_id: int) -> None: + """Delete/anonymize all personal data for the given class.""" + if _handler is None: + raise RuntimeError("No deletion handler registered") + _handler.delete_class_data(class_id) diff --git a/src/gened/instructor.py b/src/gened/instructor.py index ec72e68..fb507b8 100644 --- a/src/gened/instructor.py +++ b/src/gened/instructor.py @@ -14,13 +14,10 @@ """ import datetime as dt -import sys -from collections.abc import Callable from sqlite3 import Row from flask import ( Blueprint, - Flask, abort, flash, redirect, @@ -30,30 +27,13 @@ ) from werkzeug.wrappers.response import Response -from .auth import get_auth, get_auth_class, instructor_required +from .auth import get_auth_class, instructor_required from .classes import switch_class from .csv import csv_response +from .data_deletion import delete_class_data from .db import get_db from .redir import safe_redirect -DataDeletionHandler = Callable[[int], None] - -# Register the handler for the current application -_deletion_handlers: list[DataDeletionHandler] = [] - - -def register_class_deletion_handler(handler: DataDeletionHandler) -> None: - """Register the application's class deletion implementation""" - _deletion_handlers.append(handler) - - -def init_app(app: Flask) -> None: - """Initialize the instructor module and verify deletion handler is registered""" - if not _deletion_handlers: - app.logger.error("No data deletion handler registered. All Gen-Ed applications must provide one.") - sys.exit(1) - - bp = Blueprint('instructor', __name__, url_prefix="/instructor", template_folder='templates') @bp.before_request @@ -249,9 +229,7 @@ def delete_class() -> Response: assert str(class_id) == str(request.form.get('class_id')) # Call application-specific data deletion handler(s) - assert _deletion_handlers # checked during init - for handler in _deletion_handlers: - handler(class_id) + delete_class_data(class_id) # Deactivate all roles and disable the class db.execute("UPDATE roles SET user_id=-1, active = 0 WHERE class_id = ?", [class_id]) diff --git a/src/gened/profile.py b/src/gened/profile.py index 1555ca5..f5d53b3 100644 --- a/src/gened/profile.py +++ b/src/gened/profile.py @@ -2,22 +2,14 @@ # # SPDX-License-Identifier: AGPL-3.0-only -from collections.abc import Callable from flask import Blueprint, flash, redirect, render_template, request, session, url_for from werkzeug.wrappers.response import Response from .auth import get_auth, login_required +from .data_deletion import delete_user_data from .db import get_db from .redir import safe_redirect -# Register the handler for the current application -_deletion_handlers: list[Callable[[int], None]] = [] - - -def register_user_deletion_handler(handler: Callable[[int], None]) -> None: - """Register the application's user deletion implementation""" - _deletion_handlers.append(handler) - bp = Blueprint('profile', __name__, url_prefix="/profile", template_folder='templates') @@ -85,8 +77,7 @@ def delete_data() -> Response: assert user_id is not None # due to @login_required # Call application-specific data deletion handler(s) - for handler in _deletion_handlers: - handler(user_id) + delete_user_data(user_id) # Deactivate all roles db.execute("UPDATE roles SET user_id = -1, active = 0 WHERE user_id = ?", [user_id]) diff --git a/src/starburst/__init__.py b/src/starburst/__init__.py index 92004a5..deb6fbf 100644 --- a/src/starburst/__init__.py +++ b/src/starburst/__init__.py @@ -8,7 +8,7 @@ from gened import base -from . import helper, privacy +from . import helper, deletion_handler def create_app(test_config: dict[str, str] | None = None, instance_path: Path | None = None) -> Flask: @@ -30,7 +30,7 @@ def create_app(test_config: dict[str, str] | None = None, instance_path: Path | app_config = app_config | test_config # register app-specific functionality with gened - privacy.register_with_gened() + deletion_handler.register_with_gened() # create the base application app = base.create_app_base(__name__, app_config, instance_path) diff --git a/src/starburst/deletion_handler.py b/src/starburst/deletion_handler.py new file mode 100644 index 0000000..1f667b2 --- /dev/null +++ b/src/starburst/deletion_handler.py @@ -0,0 +1,41 @@ +# SPDX-FileCopyrightText: 2024 Mark Liffiton +# +# SPDX-License-Identifier: AGPL-3.0-only + +from gened.data_deletion import register_handler +from gened.db import get_db + + +class StarburstDeletionHandler: + """Handler for deleting Starburst user data.""" + + def delete_user_data(self, user_id: int) -> None: + """Delete/Anonymize personal data for a user while preserving non-personal data for analysis.""" + db = get_db() + + # Delete queries + db.execute(""" + DELETE queries + WHERE user_id = ? + """, [user_id]) + + db.commit() + + def delete_class_data(self, class_id: int) -> None: + """Delete/Anonymize personal data for a class while preserving non-personal data for analysis.""" + db = get_db() + + # Delete queries + db.execute(""" + DELETE FROM queries + WHERE role_id IN ( + SELECT id FROM roles WHERE class_id = ? + ) + """, [class_id]) + + db.commit() + + +def register_with_gened() -> None: + """Register Starburst deletion handler with the gened framework.""" + register_handler(StarburstDeletionHandler()) diff --git a/src/starburst/privacy.py b/src/starburst/privacy.py deleted file mode 100644 index 4e5cb49..0000000 --- a/src/starburst/privacy.py +++ /dev/null @@ -1,26 +0,0 @@ -# SPDX-FileCopyrightText: 2024 Mark Liffiton -# -# SPDX-License-Identifier: AGPL-3.0-only - -from gened.db import get_db -from gened.instructor import register_class_deletion_handler - - -def delete_class_data(class_id: int) -> None: - """Delete/Anonymize personal data for a class while preserving non-personal data for analysis.""" - db = get_db() - - # Delete queries - db.execute(""" - DELETE FROM queries - WHERE role_id IN ( - SELECT id FROM roles WHERE class_id = ? - ) - """, [class_id]) - - db.commit() - - -def register_with_gened() -> None: - """ Register privacy functionality with the main gened module.""" - register_class_deletion_handler(delete_class_data)