From 2cf765ea583af515802da2dc6127ed40c0703741 Mon Sep 17 00:00:00 2001
From: Chris van Run <chris.vanrun@radboudumc.nl>
Date: Wed, 18 Dec 2024 16:54:38 +0100
Subject: [PATCH 01/12] Ensure optional postprocessors are ran before the
 default postprocessors

---
 app/config/settings.py                        | 26 ++++++++++++++++---
 .../core/templatetags/bleach.py               |  2 +-
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/app/config/settings.py b/app/config/settings.py
index 8e06ba64a0..e6621d52e4 100644
--- a/app/config/settings.py
+++ b/app/config/settings.py
@@ -22,7 +22,7 @@
 from config.denylist import USERNAME_DENYLIST
 from grandchallenge.components.exceptions import PriorStepFailed
 from grandchallenge.core.utils import strtobool
-from grandchallenge.core.utils.markdown import BS4Extension
+from grandchallenge.core.utils.markdown import ExtendTagClasses
 
 MEGABYTE = 1024 * 1024
 GIGABYTE = 1024 * MEGABYTE
@@ -789,13 +789,31 @@ def get_private_ip():
     "pymdownx.magiclink",
     "pymdownx.tasklist",
     "pymdownx.tilde",
-    BS4Extension(),
 ]
-MARKDOWN_POST_PROCESSORS = []
+
+MARKDOWN_POST_PROCESSORS = [
+    ExtendTagClasses(
+        {
+            "img": ["img-fluid"],
+            "blockquote": ["blockquote"],
+            "table": [
+                "table",
+                "table-hover",
+                "table-borderless",
+            ],
+            "thead": ["thead-light"],
+            "code": ["codehilite"],
+        }
+    )
+]
 MARKDOWNX_MARKDOWNIFY_FUNCTION = (
     "grandchallenge.core.templatetags.bleach.md2html"
 )
-MARKDOWNX_MARKDOWN_EXTENSION_CONFIGS = {}
+MARKDOWNX_MARKDOWN_EXTENSION_CONFIGS = {
+    "markdown.extensions.codehilite": {
+        "wrapcode": False,
+    }
+}
 MARKDOWNX_IMAGE_MAX_SIZE = {"size": (2000, 0), "quality": 90}
 MARKDOWNX_EDITOR_RESIZABLE = "False"
 
diff --git a/app/grandchallenge/core/templatetags/bleach.py b/app/grandchallenge/core/templatetags/bleach.py
index a0c9386e14..2bf85bfc4e 100644
--- a/app/grandchallenge/core/templatetags/bleach.py
+++ b/app/grandchallenge/core/templatetags/bleach.py
@@ -87,7 +87,7 @@ def md2html(
     post_processors = [*settings.MARKDOWN_POST_PROCESSORS]
 
     if process_youtube_tags:
-        post_processors.append(YOUTUBE_TAG_SUBSTITUTION)
+        post_processors = [YOUTUBE_TAG_SUBSTITUTION, *post_processors]
 
     for processor in post_processors:
         cleaned_html = processor(cleaned_html)

From b51031250e947595748ab23be6f8e9a2feacb40f Mon Sep 17 00:00:00 2001
From: Chris van Run <chris.vanrun@radboudumc.nl>
Date: Wed, 18 Dec 2024 16:55:13 +0100
Subject: [PATCH 02/12] Add new ExtendTagClasses post processor

---
 app/grandchallenge/core/utils/markdown.py | 67 +++++++----------------
 1 file changed, 20 insertions(+), 47 deletions(-)

diff --git a/app/grandchallenge/core/utils/markdown.py b/app/grandchallenge/core/utils/markdown.py
index 98b9d94176..c963831486 100644
--- a/app/grandchallenge/core/utils/markdown.py
+++ b/app/grandchallenge/core/utils/markdown.py
@@ -1,61 +1,34 @@
-from xml.etree import ElementTree
-
-from bs4 import BeautifulSoup, Tag
+from bs4 import BeautifulSoup
+from django.utils.html import escape
+from django.utils.safestring import SafeString, mark_safe
 from markdown import Extension
 from markdown.treeprocessors import Treeprocessor
 
 
-class BS4Extension(Extension):
-    def extendMarkdown(self, md):  # noqa: N802
-        md.registerExtension(self)
-        md.treeprocessors.register(BS4Treeprocessor(md), "bs4_extension", 0)
-
-
-class BS4Treeprocessor(Treeprocessor):
-    def run(self, root):
-        el_class_dict = {
-            "img": "img-fluid",
-            "blockquote": "blockquote",
-            "table": "table table-hover table-borderless",
-            "thead": "thead-light",
-            "code": "codehilite",
-        }
-
-        for el in root.iter():
-            if el.tag in el_class_dict:
-                self.set_css_class(
-                    element=el, class_name=el_class_dict[el.tag]
-                )
-
-        for i, html_block in enumerate(self.md.htmlStash.rawHtmlBlocks):
-            bs4block = BeautifulSoup(html_block, "html.parser")
+class ExtendTagClasses:
+    def __init__(self, tag_classes):
+        self.tag_class_dict = tag_classes
 
-            for tag, tag_class in el_class_dict.items():
-                for el in bs4block.find_all(tag):
-                    self.set_css_class(element=el, class_name=tag_class)
-                    self.md.htmlStash.rawHtmlBlocks[i] = str(bs4block)
+    def __call__(self, html):
+        input_is_safe = isinstance(html, SafeString)
 
-    @staticmethod
-    def set_css_class(*, element, class_name):
-        if isinstance(element, ElementTree.Element):
-            current_class = element.attrib.get("class", "")
+        soup = BeautifulSoup(html, "html.parser")
+        for tag, classes in self.tag_class_dict.items():
 
-            if class_name not in current_class:
-                new_class = f"{current_class} {class_name}".strip()
-                element.set("class", new_class)
+            # Make extensions safe
+            classes = [escape(c).strip() for c in classes]
 
-        elif isinstance(element, Tag):
-            if "class" not in element.attrs:
-                element.attrs["class"] = []
+            # Add extension to the class attribute
+            for element in soup.find_all(tag):
+                current_classes = element.get("class", [])
+                element["class"] = [*current_classes, *classes]
 
-            current_class = element["class"]
+        new_html = str(soup)
 
-            for name in class_name.split(" "):
-                if class_name not in current_class:
-                    current_class.append(name)
+        if input_is_safe:
+            new_html = mark_safe(new_html)
 
-        else:
-            raise TypeError("Unsupported element")
+        return mark_safe(new_html)
 
 
 class LinkBlankTargetExtension(Extension):

From 72d340203f4118cc2da3651be42e73ab1506a2e2 Mon Sep 17 00:00:00 2001
From: Chris van Run <chris.vanrun@radboudumc.nl>
Date: Wed, 18 Dec 2024 16:55:23 +0100
Subject: [PATCH 03/12] First fixes to markdown tests

---
 app/tests/core_tests/test_markdown.py | 120 +++++++++++---------------
 1 file changed, 52 insertions(+), 68 deletions(-)

diff --git a/app/tests/core_tests/test_markdown.py b/app/tests/core_tests/test_markdown.py
index 3dcf485759..460ddef207 100644
--- a/app/tests/core_tests/test_markdown.py
+++ b/app/tests/core_tests/test_markdown.py
@@ -5,61 +5,57 @@
 from markdown import markdown
 
 from grandchallenge.core.templatetags.bleach import md2html
-from grandchallenge.core.utils.markdown import BS4Treeprocessor
 
 
 @pytest.mark.parametrize(
     "markdown_with_html, expected_output",
     (
-        (
-            textwrap.dedent(
-                """
-                ![](test.png)
-
-                > Quote Me
-
-                Markdown | Less | Pretty
-                --- | --- | ---
-                *Still* | `renders` | **nicely**
-                1 | 2 | 3
-
-                ```python
-                def test_function():
-                    pass
-                ```"""
-            ),
-            textwrap.dedent(
-                """\
-                <p><img class="img-fluid" src="test.png"></p>
-                <blockquote class="blockquote">
-                <p>Quote Me</p>
-                </blockquote>
-                <table class="table table-hover table-borderless">
-                <thead class="thead-light">
-                <tr>
-                <th>Markdown</th>
-                <th>Less</th>
-                <th>Pretty</th>
-                </tr>
-                </thead>
-                <tbody>
-                <tr>
-                <td><em>Still</em></td>
-                <td><code class="codehilite">renders</code></td>
-                <td><strong>nicely</strong></td>
-                </tr>
-                <tr>
-                <td>1</td>
-                <td>2</td>
-                <td>3</td>
-                </tr>
-                </tbody>
-                </table>
-                <div class="codehilite"><pre><span></span><code class="codehilite"><span class="k">def</span> <span class="nf">test_function</span><span class="p">():</span>
-                    <span class="k">pass</span>
-                </code></pre></div>"""
-            ),
-        ),
+        # (
+        #     textwrap.dedent(
+        #         """
+        #         ![](test.png)
+        #         > Quote Me
+        #         Markdown | Less | Pretty
+        #         --- | --- | ---
+        #         *Still* | `renders` | **nicely**
+        #         1 | 2 | 3
+        #         ```python
+        #         def test_function():
+        #             pass
+        #         ```"""
+        #     ),
+        #     textwrap.dedent(
+        #         """\
+        #         <p><img class="img-fluid" src="test.png"/></p>
+        #         <blockquote class="blockquote">
+        #         <p>Quote Me</p>
+        #         </blockquote>
+        #         <table class="table table-hover table-borderless">
+        #         <thead class="thead-light">
+        #         <tr>
+        #         <th>Markdown</th>
+        #         <th>Less</th>
+        #         <th>Pretty</th>
+        #         </tr>
+        #         </thead>
+        #         <tbody>
+        #         <tr>
+        #         <td><em>Still</em></td>
+        #         <td><code class="codehilite">renders</code></td>
+        #         <td><strong>nicely</strong></td>
+        #         </tr>
+        #         <tr>
+        #         <td>1</td>
+        #         <td>2</td>
+        #         <td>3</td>
+        #         </tr>
+        #         </tbody>
+        #         </table>
+        #         <div class="codehilite"><pre><span></span><span class="k">def</span> <span class="nf">test_function</span><span class="p">():</span>
+        #             <span class="k">pass</span>
+        #         </pre></div>"""
+        #     ),
+        # ),
         (
             textwrap.dedent(
                 r"""
@@ -123,10 +119,10 @@ def test_function():
             ),
             textwrap.dedent(
                 """\
-                <p><img class="img-fluid" src="test.png"></p>
-                <p><img class="img-fluid" src="test-no-class.png"></p>
-                <p><img class="img-fluid" src="test-empty-class.png"></p>
-                <p><img class="ml-3 img-fluid" src="test-existing-class.png"></p>
+                <p><img class="img-fluid" src="test.png"/></p>
+                <p><img class="img-fluid" src="test-no-class.png"/></p>
+                <p><img class="img-fluid" src="test-empty-class.png"/></p>
+                <p><img class="ml-3 img-fluid" src="test-existing-class.png"/></p>
                 <blockquote class="blockquote">
                 <p>Quote Me</p>
                 </blockquote>
@@ -136,7 +132,6 @@ def test_function():
                 <blockquote class="ml-3 blockquote">
                 <p>Quote Me Existing Class</p>
                 </blockquote>
-
                 <table class="table table-hover table-borderless">
                 <thead class="thead-light">
                 <tr>
@@ -167,7 +162,6 @@ def test_function():
                 <tbody>
                 </tbody>
                 </table>
-
                 <table class="ml-3 table table-hover table-borderless">
                 <thead class="ml-3 thead-light">
                 <tr>
@@ -177,14 +171,11 @@ def test_function():
                 <tbody>
                 </tbody>
                 </table>
-
-                <div class="codehilite"><pre><span></span><code class="codehilite"><span class="k">def</span> <span class="nf">test_function</span><span class="p">():</span>
+                <div class="codehilite"><pre><span></span><span class="k">def</span> <span class="nf">test_function</span><span class="p">():</span>
                     <span class="k">pass</span>
-                </code></pre></div>
-
+                </pre></div>
                 <div><pre><code class="codehilite">no class</code></pre></div>
                 <div class="ml-3"><pre><code class="ml-3 codehilite">existing class</code></pre></div>
-
                 <p><del>Delete me</del></p>
                 <p>CH<sub>3</sub>CH<sub>2</sub>OH
                 text<sub>a subscript</sub></p>
@@ -246,10 +237,3 @@ def test_setting_class_to_html_img_within_markdown(
     )
 
     assert output == expected_output
-
-
-def test_tree_processor_set_css_class_type_error():
-    with pytest.raises(TypeError):
-        BS4Treeprocessor.set_css_class(
-            element="element", class_name="img-fluid"
-        )

From 42d41dcba3a24e2f0731ea1a421798922b5626cd Mon Sep 17 00:00:00 2001
From: Chris van Run <chris.vanrun@radboudumc.nl>
Date: Fri, 20 Dec 2024 15:47:07 +0100
Subject: [PATCH 04/12] Add HTML postprocessor to extend classes

---
 app/config/settings.py                    |   7 +-
 app/grandchallenge/core/utils/markdown.py |  51 ++++--
 app/tests/core_tests/test_markdown.py     | 214 ++++++++++++++--------
 3 files changed, 181 insertions(+), 91 deletions(-)

diff --git a/app/config/settings.py b/app/config/settings.py
index e6621d52e4..7303c51c38 100644
--- a/app/config/settings.py
+++ b/app/config/settings.py
@@ -22,7 +22,7 @@
 from config.denylist import USERNAME_DENYLIST
 from grandchallenge.components.exceptions import PriorStepFailed
 from grandchallenge.core.utils import strtobool
-from grandchallenge.core.utils.markdown import ExtendTagClasses
+from grandchallenge.core.utils.markdown import ExtendHTMLTagClasses
 
 MEGABYTE = 1024 * 1024
 GIGABYTE = 1024 * MEGABYTE
@@ -790,9 +790,8 @@ def get_private_ip():
     "pymdownx.tasklist",
     "pymdownx.tilde",
 ]
-
 MARKDOWN_POST_PROCESSORS = [
-    ExtendTagClasses(
+    ExtendHTMLTagClasses(
         {
             "img": ["img-fluid"],
             "blockquote": ["blockquote"],
@@ -804,7 +803,7 @@ def get_private_ip():
             "thead": ["thead-light"],
             "code": ["codehilite"],
         }
-    )
+    ),
 ]
 MARKDOWNX_MARKDOWNIFY_FUNCTION = (
     "grandchallenge.core.templatetags.bleach.md2html"
diff --git a/app/grandchallenge/core/utils/markdown.py b/app/grandchallenge/core/utils/markdown.py
index c963831486..4427529ef1 100644
--- a/app/grandchallenge/core/utils/markdown.py
+++ b/app/grandchallenge/core/utils/markdown.py
@@ -5,30 +5,53 @@
 from markdown.treeprocessors import Treeprocessor
 
 
-class ExtendTagClasses:
+class BeautifulSoupWithCharEntities(BeautifulSoup):
+    """
+    Soup generator that elegantly handles reserved HTML entity placeholders.
+
+    For instance, the soup HTMLparser replaces these (e.g. '&lt;') into their
+    unicode equivalents (e.g. '<').
+
+    This messes up things if the HTML is decoded into a string again.
+    """
+
+    def __init__(self, /, markup, features="html.parser", **kwargs):
+        markup = markup.replace("&", "&amp;")
+
+        super().__init__(markup=markup, features=features, **kwargs)
+
+    def decode(self, **kwargs):
+        # Prevent entity subsitution (e.g. "&" -> "&amp")
+        kwargs["formatter"] = None
+        return super().decode(**kwargs)
+
+
+class ExtendHTMLTagClasses:
     def __init__(self, tag_classes):
-        self.tag_class_dict = tag_classes
+        # Make extensions safe
+        self.tag_class_dict = {
+            t: [escape(c).strip() for c in classes]
+            for t, classes in tag_classes.items()
+        }
 
     def __call__(self, html):
         input_is_safe = isinstance(html, SafeString)
 
-        soup = BeautifulSoup(html, "html.parser")
-        for tag, classes in self.tag_class_dict.items():
-
-            # Make extensions safe
-            classes = [escape(c).strip() for c in classes]
+        soup = BeautifulSoupWithCharEntities(markup=html)
 
-            # Add extension to the class attribute
-            for element in soup.find_all(tag):
-                current_classes = element.get("class", [])
-                element["class"] = [*current_classes, *classes]
+        for element in soup.find_all(self.tag_class_dict.keys()):
+            classes = element.get("class", [])
+            for new_class in self.tag_class_dict[element.name]:
+                if new_class not in classes:
+                    classes.append(new_class)
+            element["class"] = classes
 
-        new_html = str(soup)
+        new_markup = soup.decode()
 
         if input_is_safe:
-            new_html = mark_safe(new_html)
+            new_markup = mark_safe(new_markup)
 
-        return mark_safe(new_html)
+        return new_markup
 
 
 class LinkBlankTargetExtension(Extension):
diff --git a/app/tests/core_tests/test_markdown.py b/app/tests/core_tests/test_markdown.py
index 460ddef207..2dedc5df6b 100644
--- a/app/tests/core_tests/test_markdown.py
+++ b/app/tests/core_tests/test_markdown.py
@@ -1,61 +1,64 @@
 import textwrap
 
 import pytest
-from django.conf import settings
-from markdown import markdown
+from django.utils.safestring import SafeString, mark_safe
 
 from grandchallenge.core.templatetags.bleach import md2html
+from grandchallenge.core.utils.markdown import ExtendHTMLTagClasses
 
 
 @pytest.mark.parametrize(
     "markdown_with_html, expected_output",
     (
-        # (
-        #     textwrap.dedent(
-        #         """
-        #         ![](test.png)
-        #         > Quote Me
-        #         Markdown | Less | Pretty
-        #         --- | --- | ---
-        #         *Still* | `renders` | **nicely**
-        #         1 | 2 | 3
-        #         ```python
-        #         def test_function():
-        #             pass
-        #         ```"""
-        #     ),
-        #     textwrap.dedent(
-        #         """\
-        #         <p><img class="img-fluid" src="test.png"/></p>
-        #         <blockquote class="blockquote">
-        #         <p>Quote Me</p>
-        #         </blockquote>
-        #         <table class="table table-hover table-borderless">
-        #         <thead class="thead-light">
-        #         <tr>
-        #         <th>Markdown</th>
-        #         <th>Less</th>
-        #         <th>Pretty</th>
-        #         </tr>
-        #         </thead>
-        #         <tbody>
-        #         <tr>
-        #         <td><em>Still</em></td>
-        #         <td><code class="codehilite">renders</code></td>
-        #         <td><strong>nicely</strong></td>
-        #         </tr>
-        #         <tr>
-        #         <td>1</td>
-        #         <td>2</td>
-        #         <td>3</td>
-        #         </tr>
-        #         </tbody>
-        #         </table>
-        #         <div class="codehilite"><pre><span></span><span class="k">def</span> <span class="nf">test_function</span><span class="p">():</span>
-        #             <span class="k">pass</span>
-        #         </pre></div>"""
-        #     ),
-        # ),
+        (
+            textwrap.dedent(
+                """
+                ![](test.png)
+
+                > Quote Me
+
+                Markdown | Less | Pretty
+                --- | --- | ---
+                *Still* | `renders` | **nicely**
+                1 | 2 | 3
+
+                ```python
+                def test_function():
+                    pass
+                ```"""
+            ),
+            textwrap.dedent(
+                """\
+                <p><img class="img-fluid" src="test.png"/></p>
+                <blockquote class="blockquote">
+                <p>Quote Me</p>
+                </blockquote>
+                <table class="table table-hover table-borderless">
+                <thead class="thead-light">
+                <tr>
+                <th>Markdown</th>
+                <th>Less</th>
+                <th>Pretty</th>
+                </tr>
+                </thead>
+                <tbody>
+                <tr>
+                <td><em>Still</em></td>
+                <td><code class="codehilite">renders</code></td>
+                <td><strong>nicely</strong></td>
+                </tr>
+                <tr>
+                <td>1</td>
+                <td>2</td>
+                <td>3</td>
+                </tr>
+                </tbody>
+                </table>
+                <div class="codehilite"><pre><span></span><span class="k">def</span> <span class="nf">test_function</span><span class="p">():</span>
+                    <span class="k">pass</span>
+                </pre></div>"""
+            ),
+        ),
         (
             textwrap.dedent(
                 r"""
@@ -185,6 +188,10 @@ def test_function():
                 </ul>"""
             ),
         ),
+        (
+            "&lt;script&gt;alert(&quot;foo&quot;)&lt;/script&gt;",
+            "&lt;script&gt;alert(&quot;foo&quot;)&lt;/script&gt;",
+        ),
     ),
 )
 def test_markdown_rendering(markdown_with_html, expected_output):
@@ -196,44 +203,105 @@ def test_markdown_rendering(markdown_with_html, expected_output):
     "markdown_with_html, expected_output",
     (
         (
-            """<img src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-         [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)""",
-            """<p><img class="img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-         <a href="https://google.com"><img alt="" class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg" /></a></p>""",
+            textwrap.dedent(
+                """\
+                <img src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
+                [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
+            ),
+            textwrap.dedent(
+                """\
+                <p><img class="img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
+                <a href="https://google.com"><img class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg"/></a></p>"""
+            ),
         ),
         (
-            """<img class="" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-         [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)""",
-            """<p><img class="img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-         <a href="https://google.com"><img alt="" class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg" /></a></p>""",
+            textwrap.dedent(
+                """\
+                <img class="" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
+                [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
+            ),
+            textwrap.dedent(
+                """\
+                <p><img class="img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
+                <a href="https://google.com"><img class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg"/></a></p>"""
+            ),
         ),
         (
-            """<img class="ml-2" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-         [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)""",
-            """<p><img class="ml-2 img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-         <a href="https://google.com"><img alt="" class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg" /></a></p>""",
+            textwrap.dedent(
+                """\
+                <img class="ml-2" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
+                [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
+            ),
+            textwrap.dedent(
+                """\
+                <p><img class="ml-2 img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
+                <a href="https://google.com"><img class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg"/></a></p>"""
+            ),
         ),
         (
-            """<img class="img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-         [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)""",
-            """<p><img class="img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-         <a href="https://google.com"><img alt="" class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg" /></a></p>""",
+            textwrap.dedent(
+                """\
+                <img class="img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
+                [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
+            ),
+            textwrap.dedent(
+                """\
+                <p><img class="img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
+                <a href="https://google.com"><img class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg"/></a></p>"""
+            ),
         ),
         (
-            """<img class="ml-2 img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-         [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)""",
-            """<p><img class="ml-2 img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-         <a href="https://google.com"><img alt="" class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg" /></a></p>""",
+            textwrap.dedent(
+                """\
+                <img class="ml-2 img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
+                [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
+            ),
+            textwrap.dedent(
+                """\
+                <p><img class="ml-2 img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
+                <a href="https://google.com"><img class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg"/></a></p>"""
+            ),
         ),
     ),
 )
 def test_setting_class_to_html_img_within_markdown(
     markdown_with_html, expected_output
 ):
-    output = markdown(
-        text=markdown_with_html,
-        extensions=settings.MARKDOWNX_MARKDOWN_EXTENSIONS,
-        extension_configs=settings.MARKDOWNX_MARKDOWN_EXTENSION_CONFIGS,
-    )
+    output = md2html(markdown=markdown_with_html)
 
     assert output == expected_output
+
+
+@pytest.mark.parametrize(
+    "html, is_safe",
+    [
+        (
+            mark_safe("<div>Content</div>"),
+            True,
+        ),
+        (
+            "<div>Content</div>",
+            False,
+        ),
+    ],
+)
+def test_extend_html_tag_classes_insecure_markup(html, is_safe):
+    tag_classes = {"div": ["new-class"]}
+
+    # Instantiate the class
+    extender = ExtendHTMLTagClasses(tag_classes)
+
+    # Process the HTML
+    result = extender(html)
+
+    # Check if the output matches the expected safety status
+    assert isinstance(result, SafeString) == is_safe
+
+
+def test_extend_html_tag_classes_insecure_classes():
+    extender = ExtendHTMLTagClasses({"div": ['<script>alert("foo")</script>']})
+    output = extender("<div>Content</div>")
+    assert (
+        output
+        == '<div class="&lt;script&gt;alert(&quot;foo&quot;)&lt;/script&gt;">Content</div>'
+    )

From 237838495a78120be3a64f1bc56026c8f748fa17 Mon Sep 17 00:00:00 2001
From: Chris van Run <chris.vanrun@radboudumc.nl>
Date: Fri, 20 Dec 2024 16:24:20 +0100
Subject: [PATCH 05/12] Refactor test

---
 app/tests/core_tests/test_markdown.py | 137 +++++++++-----------------
 1 file changed, 45 insertions(+), 92 deletions(-)

diff --git a/app/tests/core_tests/test_markdown.py b/app/tests/core_tests/test_markdown.py
index 2dedc5df6b..75e887f1c8 100644
--- a/app/tests/core_tests/test_markdown.py
+++ b/app/tests/core_tests/test_markdown.py
@@ -190,118 +190,71 @@ def test_function():
         ),
         (
             "&lt;script&gt;alert(&quot;foo&quot;)&lt;/script&gt;",
-            "&lt;script&gt;alert(&quot;foo&quot;)&lt;/script&gt;",
-        ),
-    ),
-)
-def test_markdown_rendering(markdown_with_html, expected_output):
-    output = md2html(markdown=markdown_with_html)
-    assert output == expected_output
-
-
-@pytest.mark.parametrize(
-    "markdown_with_html, expected_output",
-    (
-        (
-            textwrap.dedent(
-                """\
-                <img src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-                [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
-            ),
-            textwrap.dedent(
-                """\
-                <p><img class="img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-                <a href="https://google.com"><img class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg"/></a></p>"""
-            ),
-        ),
-        (
-            textwrap.dedent(
-                """\
-                <img class="" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-                [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
-            ),
-            textwrap.dedent(
-                """\
-                <p><img class="img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-                <a href="https://google.com"><img class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg"/></a></p>"""
-            ),
-        ),
-        (
-            textwrap.dedent(
-                """\
-                <img class="ml-2" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-                [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
-            ),
-            textwrap.dedent(
-                """\
-                <p><img class="ml-2 img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-                <a href="https://google.com"><img class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg"/></a></p>"""
-            ),
+            "<p>&lt;script&gt;alert(&quot;foo&quot;)&lt;/script&gt;</p>",
         ),
         (
-            textwrap.dedent(
-                """\
-                <img class="img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-                [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
-            ),
-            textwrap.dedent(
-                """\
-                <p><img class="img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-                <a href="https://google.com"><img class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg"/></a></p>"""
-            ),
-        ),
-        (
-            textwrap.dedent(
-                """\
-                <img class="ml-2 img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-                [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
-            ),
-            textwrap.dedent(
-                """\
-                <p><img class="ml-2 img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
-                <a href="https://google.com"><img class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg"/></a></p>"""
-            ),
+            "[![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)",
+            '<p><a href="https://google.com"><img class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg"/></a></p>',
         ),
     ),
 )
-def test_setting_class_to_html_img_within_markdown(
-    markdown_with_html, expected_output
-):
+def test_markdown_rendering(markdown_with_html, expected_output):
     output = md2html(markdown=markdown_with_html)
-
     assert output == expected_output
 
 
 @pytest.mark.parametrize(
-    "html, is_safe",
+    "html, tag_classes, expected_output, is_safe",
     [
-        (
+        (  # Safe input
             mark_safe("<div>Content</div>"),
+            {},
+            "<div>Content</div>",
             True,
         ),
-        (
+        (  # Unsafe input
+            "<div>Content</div>",
+            {},
             "<div>Content</div>",
             False,
         ),
+        (  # Escaped classes
+            mark_safe("<div>Content</div>"),
+            {"div": ['<script>alert("foo")</script>']},
+            '<div class="&lt;script&gt;alert(&quot;foo&quot;)&lt;/script&gt;">Content</div>',
+            True,
+        ),
+        (  # Empty class
+            '<div class="">Content</div>',
+            {"div": ["foo"]},
+            '<div class="foo">Content</div>',
+            False,
+        ),
+        (  # Existing class
+            '<div class="ml-2">Content</div>',
+            {"div": ["foo"]},
+            '<div class="ml-2 foo">Content</div>',
+            False,
+        ),
+        (  # Extension class already present
+            '<div class="foo">Content</div>',
+            {"div": ["foo"]},
+            '<div class="foo">Content</div>',
+            False,
+        ),
+        (  # Existing class + extension class
+            '<div class="ml-2 foo">Content</div>',
+            {"div": ["foo"]},
+            '<div class="ml-2 foo">Content</div>',
+            False,
+        ),
     ],
 )
-def test_extend_html_tag_classes_insecure_markup(html, is_safe):
-    tag_classes = {"div": ["new-class"]}
-
-    # Instantiate the class
+def test_extend_html_tag_classes(html, tag_classes, expected_output, is_safe):
     extender = ExtendHTMLTagClasses(tag_classes)
+    output = extender(html)
 
-    # Process the HTML
-    result = extender(html)
+    assert output == expected_output
 
     # Check if the output matches the expected safety status
-    assert isinstance(result, SafeString) == is_safe
-
-
-def test_extend_html_tag_classes_insecure_classes():
-    extender = ExtendHTMLTagClasses({"div": ['<script>alert("foo")</script>']})
-    output = extender("<div>Content</div>")
-    assert (
-        output
-        == '<div class="&lt;script&gt;alert(&quot;foo&quot;)&lt;/script&gt;">Content</div>'
-    )
+    assert isinstance(output, SafeString) == is_safe

From 5210a63e0007861957cddf1d8c546d3bd70018e9 Mon Sep 17 00:00:00 2001
From: Chris van Run <chris.vanrun@radboudumc.nl>
Date: Fri, 20 Dec 2024 16:24:30 +0100
Subject: [PATCH 06/12] Handle empty tags

---
 app/grandchallenge/core/utils/markdown.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/app/grandchallenge/core/utils/markdown.py b/app/grandchallenge/core/utils/markdown.py
index 4427529ef1..7f820de2ea 100644
--- a/app/grandchallenge/core/utils/markdown.py
+++ b/app/grandchallenge/core/utils/markdown.py
@@ -29,7 +29,7 @@ def decode(self, **kwargs):
 class ExtendHTMLTagClasses:
     def __init__(self, tag_classes):
         # Make extensions safe
-        self.tag_class_dict = {
+        self.clean_tag_classes = {
             t: [escape(c).strip() for c in classes]
             for t, classes in tag_classes.items()
         }
@@ -39,12 +39,14 @@ def __call__(self, html):
 
         soup = BeautifulSoupWithCharEntities(markup=html)
 
-        for element in soup.find_all(self.tag_class_dict.keys()):
-            classes = element.get("class", [])
-            for new_class in self.tag_class_dict[element.name]:
-                if new_class not in classes:
-                    classes.append(new_class)
-            element["class"] = classes
+        tags = [*self.clean_tag_classes.keys()]
+        if tags:
+            for element in soup.find_all(tags):
+                classes = element.get("class", [])
+                for new_class in self.clean_tag_classes[element.name]:
+                    if new_class not in classes:
+                        classes.append(new_class)
+                element["class"] = classes
 
         new_markup = soup.decode()
 

From edae00c7aa6d405bdabb25ddb326c843bf84e4cd Mon Sep 17 00:00:00 2001
From: Chris van Run <chris.vanrun@radboudumc.nl>
Date: Fri, 20 Dec 2024 16:46:46 +0100
Subject: [PATCH 07/12] Fix youtube tag subsitution test

---
 .../core_tests/test_tag_substitutions.py      | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/app/tests/core_tests/test_tag_substitutions.py b/app/tests/core_tests/test_tag_substitutions.py
index 9d76d73287..3ae6208ee9 100644
--- a/app/tests/core_tests/test_tag_substitutions.py
+++ b/app/tests/core_tests/test_tag_substitutions.py
@@ -1,3 +1,5 @@
+import textwrap
+
 import pytest
 from django.utils.safestring import SafeString, mark_safe
 
@@ -131,16 +133,13 @@ def test_no_change_with_no_tag_or_arg_match(content):
     assert s == content
 
 
-EXPECTED_YOUTUBE_EMBED = """<p><div class="embed-responsive embed-responsive-16by9 rounded border-0">
-    <iframe
-            src="https://www.youtube-nocookie.com/embed/QCYYhkTlnhQ?disablekb=1&amp;rel=0"
-            allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture; web-share; fullscreen"
-            class="embed-responsive-item"
-            loading="lazy"
-            sandbox="allow-scripts allow-same-origin allow-presentation allow-popups"
-    ></iframe>
-</div>
-</p>"""
+EXPECTED_YOUTUBE_EMBED = textwrap.dedent(
+    """\
+    <p><div class="embed-responsive embed-responsive-16by9 rounded border-0">
+    <iframe allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture; web-share; fullscreen" class="embed-responsive-item" loading="lazy" sandbox="allow-scripts allow-same-origin allow-presentation allow-popups" src="https://www.youtube-nocookie.com/embed/QCYYhkTlnhQ?disablekb=1&amp;rel=0"></iframe>
+    </div>
+    </p>"""
+)
 
 
 @pytest.mark.parametrize(

From f7286a9703ab3c8106003c33271d13da1fb2720d Mon Sep 17 00:00:00 2001
From: James Meakin <12661555+jmsmkn@users.noreply.github.com>
Date: Wed, 8 Jan 2025 11:27:43 +0100
Subject: [PATCH 08/12] Revert to using markdown processor

---
 app/config/settings.py                        | 19 +----
 .../core/templatetags/bleach.py               |  2 +-
 app/grandchallenge/core/utils/markdown.py     | 65 +++++-----------
 app/tests/core_tests/test_markdown.py         | 75 +++++++------------
 4 files changed, 52 insertions(+), 109 deletions(-)

diff --git a/app/config/settings.py b/app/config/settings.py
index 7303c51c38..7310eeba90 100644
--- a/app/config/settings.py
+++ b/app/config/settings.py
@@ -22,7 +22,7 @@
 from config.denylist import USERNAME_DENYLIST
 from grandchallenge.components.exceptions import PriorStepFailed
 from grandchallenge.core.utils import strtobool
-from grandchallenge.core.utils.markdown import ExtendHTMLTagClasses
+from grandchallenge.core.utils.markdown import BS4Extension
 
 MEGABYTE = 1024 * 1024
 GIGABYTE = 1024 * MEGABYTE
@@ -789,22 +789,9 @@ def get_private_ip():
     "pymdownx.magiclink",
     "pymdownx.tasklist",
     "pymdownx.tilde",
+    BS4Extension(),
 ]
-MARKDOWN_POST_PROCESSORS = [
-    ExtendHTMLTagClasses(
-        {
-            "img": ["img-fluid"],
-            "blockquote": ["blockquote"],
-            "table": [
-                "table",
-                "table-hover",
-                "table-borderless",
-            ],
-            "thead": ["thead-light"],
-            "code": ["codehilite"],
-        }
-    ),
-]
+MARKDOWN_POST_PROCESSORS = []
 MARKDOWNX_MARKDOWNIFY_FUNCTION = (
     "grandchallenge.core.templatetags.bleach.md2html"
 )
diff --git a/app/grandchallenge/core/templatetags/bleach.py b/app/grandchallenge/core/templatetags/bleach.py
index 2bf85bfc4e..a0c9386e14 100644
--- a/app/grandchallenge/core/templatetags/bleach.py
+++ b/app/grandchallenge/core/templatetags/bleach.py
@@ -87,7 +87,7 @@ def md2html(
     post_processors = [*settings.MARKDOWN_POST_PROCESSORS]
 
     if process_youtube_tags:
-        post_processors = [YOUTUBE_TAG_SUBSTITUTION, *post_processors]
+        post_processors.append(YOUTUBE_TAG_SUBSTITUTION)
 
     for processor in post_processors:
         cleaned_html = processor(cleaned_html)
diff --git a/app/grandchallenge/core/utils/markdown.py b/app/grandchallenge/core/utils/markdown.py
index 7f820de2ea..28df5d6c11 100644
--- a/app/grandchallenge/core/utils/markdown.py
+++ b/app/grandchallenge/core/utils/markdown.py
@@ -1,59 +1,34 @@
 from bs4 import BeautifulSoup
-from django.utils.html import escape
-from django.utils.safestring import SafeString, mark_safe
 from markdown import Extension
+from markdown.postprocessors import Postprocessor
 from markdown.treeprocessors import Treeprocessor
 
 
-class BeautifulSoupWithCharEntities(BeautifulSoup):
-    """
-    Soup generator that elegantly handles reserved HTML entity placeholders.
-
-    For instance, the soup HTMLparser replaces these (e.g. '&lt;') into their
-    unicode equivalents (e.g. '<').
-
-    This messes up things if the HTML is decoded into a string again.
-    """
-
-    def __init__(self, /, markup, features="html.parser", **kwargs):
-        markup = markup.replace("&", "&amp;")
-
-        super().__init__(markup=markup, features=features, **kwargs)
+class BS4Extension(Extension):
+    def extendMarkdown(self, md):  # noqa: N802
+        md.postprocessors.register(BS4Postprocessor(md), "bs4_extension", 0)
 
-    def decode(self, **kwargs):
-        # Prevent entity subsitution (e.g. "&" -> "&amp")
-        kwargs["formatter"] = None
-        return super().decode(**kwargs)
 
+class BS4Postprocessor(Postprocessor):
+    def run(self, text):
+        soup = BeautifulSoup(text, "html.parser")
 
-class ExtendHTMLTagClasses:
-    def __init__(self, tag_classes):
-        # Make extensions safe
-        self.clean_tag_classes = {
-            t: [escape(c).strip() for c in classes]
-            for t, classes in tag_classes.items()
+        class_map = {
+            "img": ["img-fluid"],
+            "blockquote": ["blockquote"],
+            "table": ["table", "table-hover", "table-borderless"],
+            "thead": ["thead-light"],
+            "code": ["codehilite"],
         }
 
-    def __call__(self, html):
-        input_is_safe = isinstance(html, SafeString)
-
-        soup = BeautifulSoupWithCharEntities(markup=html)
-
-        tags = [*self.clean_tag_classes.keys()]
-        if tags:
-            for element in soup.find_all(tags):
-                classes = element.get("class", [])
-                for new_class in self.clean_tag_classes[element.name]:
-                    if new_class not in classes:
-                        classes.append(new_class)
-                element["class"] = classes
-
-        new_markup = soup.decode()
-
-        if input_is_safe:
-            new_markup = mark_safe(new_markup)
+        for element in soup.find_all([*class_map.keys()]):
+            classes = element.get("class", [])
+            for new_class in class_map[element.name]:
+                if new_class not in classes:
+                    classes.append(new_class)
+            element["class"] = classes
 
-        return new_markup
+        return str(soup)
 
 
 class LinkBlankTargetExtension(Extension):
diff --git a/app/tests/core_tests/test_markdown.py b/app/tests/core_tests/test_markdown.py
index 91025fbde1..40e3bbc238 100644
--- a/app/tests/core_tests/test_markdown.py
+++ b/app/tests/core_tests/test_markdown.py
@@ -1,10 +1,9 @@
 import textwrap
 
 import pytest
-from django.utils.safestring import SafeString, mark_safe
+from markdown import markdown
 
 from grandchallenge.core.templatetags.bleach import md2html
-from grandchallenge.core.utils.markdown import ExtendHTMLTagClasses
 
 
 @pytest.mark.parametrize(
@@ -29,7 +28,7 @@ def test_function():
             ),
             textwrap.dedent(
                 """\
-                <p><img class="img-fluid" src="test.png"/></p>
+                <p><img class="img-fluid" src="test.png"></p>
                 <blockquote class="blockquote">
                 <p>Quote Me</p>
                 </blockquote>
@@ -122,10 +121,10 @@ def test_function():
             ),
             textwrap.dedent(
                 """\
-                <p><img class="img-fluid" src="test.png"/></p>
-                <p><img class="img-fluid" src="test-no-class.png"/></p>
-                <p><img class="img-fluid" src="test-empty-class.png"/></p>
-                <p><img class="ml-3 img-fluid" src="test-existing-class.png"/></p>
+                <p><img class="img-fluid" src="test.png"></p>
+                <p><img class="img-fluid" src="test-no-class.png"></p>
+                <p><img class="img-fluid" src="test-empty-class.png"></p>
+                <p><img class="ml-3 img-fluid" src="test-existing-class.png"></p>
                 <blockquote class="blockquote">
                 <p>Quote Me</p>
                 </blockquote>
@@ -184,17 +183,17 @@ def test_function():
                 text<sub>a subscript</sub></p>
                 <ul>
                 <li>Just paste links directly in the document like this: <a href="https://google.com">https://google.com</a>.</li>
-                <li>Or even an email address: <a href="&#109;&#97;&#105;&#108;&#116;&#111;&#58;&#102;&#97;&#107;&#101;&#46;&#101;&#109;&#97;&#105;&#108;&#64;&#101;&#109;&#97;&#105;&#108;&#46;&#99;&#111;&#109;">&#102;&#97;&#107;&#101;&#46;&#101;&#109;&#97;&#105;&#108;&#64;&#101;&#109;&#97;&#105;&#108;&#46;&#99;&#111;&#109;</a>.</li>
+                <li>Or even an email address: <a href="mailto:fake.email@email.com">fake.email@email.com</a>.</li>
                 </ul>"""
             ),
         ),
         (
             "&lt;script&gt;alert(&quot;foo&quot;)&lt;/script&gt;",
-            "<p>&lt;script&gt;alert(&quot;foo&quot;)&lt;/script&gt;</p>",
+            '<p>&lt;script&gt;alert("foo")&lt;/script&gt;</p>',
         ),
         (
             "[![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)",
-            '<p><a href="https://google.com"><img class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg"/></a></p>',
+            '<p><a href="https://google.com"><img class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg"></a></p>',
         ),
     ),
 )
@@ -204,57 +203,39 @@ def test_markdown_rendering(markdown_with_html, expected_output):
 
 
 @pytest.mark.parametrize(
-    "html, tag_classes, expected_output, is_safe",
+    "html, expected_output",
     [
-        (  # Safe input
-            mark_safe("<div>Content</div>"),
-            {},
+        (  # Unaffected element
             "<div>Content</div>",
-            True,
-        ),
-        (  # Unsafe input
-            "<div>Content</div>",
-            {},
             "<div>Content</div>",
-            False,
         ),
-        (  # Escaped classes
-            mark_safe("<div>Content</div>"),
-            {"div": ['<script>alert("foo")</script>']},
-            '<div class="&lt;script&gt;alert(&quot;foo&quot;)&lt;/script&gt;">Content</div>',
-            True,
+        (  # With Markdown
+            "> Content",
+            '<blockquote class="blockquote">\n<p>Content</p>\n</blockquote>',
         ),
         (  # Empty class
-            '<div class="">Content</div>',
-            {"div": ["foo"]},
-            '<div class="foo">Content</div>',
-            False,
+            '<blockquote class="">Content</blockquote>',
+            '<blockquote class="blockquote">Content</blockquote>',
         ),
         (  # Existing class
-            '<div class="ml-2">Content</div>',
-            {"div": ["foo"]},
-            '<div class="ml-2 foo">Content</div>',
-            False,
+            '<blockquote class="ml-2">Content</blockquote>',
+            '<blockquote class="ml-2 blockquote">Content</blockquote>',
         ),
         (  # Extension class already present
-            '<div class="foo">Content</div>',
-            {"div": ["foo"]},
-            '<div class="foo">Content</div>',
-            False,
+            '<blockquote class="blockquote">Content</blockquote>',
+            '<blockquote class="blockquote">Content</blockquote>',
         ),
         (  # Existing class + extension class
-            '<div class="ml-2 foo">Content</div>',
-            {"div": ["foo"]},
-            '<div class="ml-2 foo">Content</div>',
-            False,
+            '<blockquote class="ml-2 blockquote">Content</blockquote>',
+            '<blockquote class="ml-2 blockquote">Content</blockquote>',
         ),
     ],
 )
-def test_extend_html_tag_classes(html, tag_classes, expected_output, is_safe):
-    extender = ExtendHTMLTagClasses(tag_classes)
-    output = extender(html)
+def test_extend_html_tag_classes(html, expected_output, settings):
+    output = markdown(
+        text=html,
+        extensions=settings.MARKDOWNX_MARKDOWN_EXTENSIONS,
+        extension_configs=settings.MARKDOWNX_MARKDOWN_EXTENSION_CONFIGS,
+    )
 
     assert output == expected_output
-
-    # Check if the output matches the expected safety status
-    assert isinstance(output, SafeString) == is_safe

From a08d295c1a37123053204321060b213b51856a49 Mon Sep 17 00:00:00 2001
From: James Meakin <12661555+jmsmkn@users.noreply.github.com>
Date: Wed, 8 Jan 2025 11:35:36 +0100
Subject: [PATCH 09/12] Revert expected text

---
 .../core_tests/test_tag_substitutions.py      | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/app/tests/core_tests/test_tag_substitutions.py b/app/tests/core_tests/test_tag_substitutions.py
index 3ae6208ee9..9d76d73287 100644
--- a/app/tests/core_tests/test_tag_substitutions.py
+++ b/app/tests/core_tests/test_tag_substitutions.py
@@ -1,5 +1,3 @@
-import textwrap
-
 import pytest
 from django.utils.safestring import SafeString, mark_safe
 
@@ -133,13 +131,16 @@ def test_no_change_with_no_tag_or_arg_match(content):
     assert s == content
 
 
-EXPECTED_YOUTUBE_EMBED = textwrap.dedent(
-    """\
-    <p><div class="embed-responsive embed-responsive-16by9 rounded border-0">
-    <iframe allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture; web-share; fullscreen" class="embed-responsive-item" loading="lazy" sandbox="allow-scripts allow-same-origin allow-presentation allow-popups" src="https://www.youtube-nocookie.com/embed/QCYYhkTlnhQ?disablekb=1&amp;rel=0"></iframe>
-    </div>
-    </p>"""
-)
+EXPECTED_YOUTUBE_EMBED = """<p><div class="embed-responsive embed-responsive-16by9 rounded border-0">
+    <iframe
+            src="https://www.youtube-nocookie.com/embed/QCYYhkTlnhQ?disablekb=1&amp;rel=0"
+            allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture; web-share; fullscreen"
+            class="embed-responsive-item"
+            loading="lazy"
+            sandbox="allow-scripts allow-same-origin allow-presentation allow-popups"
+    ></iframe>
+</div>
+</p>"""
 
 
 @pytest.mark.parametrize(

From 910c14594be5dc25fb345658b4bc20bb2fc2a08b Mon Sep 17 00:00:00 2001
From: James Meakin <12661555+jmsmkn@users.noreply.github.com>
Date: Wed, 8 Jan 2025 11:41:55 +0100
Subject: [PATCH 10/12] Add a test for mixed content

---
 app/tests/core_tests/test_markdown.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/app/tests/core_tests/test_markdown.py b/app/tests/core_tests/test_markdown.py
index 40e3bbc238..c04275ba09 100644
--- a/app/tests/core_tests/test_markdown.py
+++ b/app/tests/core_tests/test_markdown.py
@@ -213,6 +213,11 @@ def test_markdown_rendering(markdown_with_html, expected_output):
             "> Content",
             '<blockquote class="blockquote">\n<p>Content</p>\n</blockquote>',
         ),
+        (  # Mixed content
+            "> Markdown Content\n"
+            "<blockquote class=>HTML Content</blockquote>",
+            '<blockquote class="blockquote">\n<p>Markdown Content</p>\n</blockquote>\n<blockquote class="blockquote">HTML Content</blockquote>',
+        ),
         (  # Empty class
             '<blockquote class="">Content</blockquote>',
             '<blockquote class="blockquote">Content</blockquote>',

From 1f6a64b791b50d6a2192a70121ef36d34bf3b6a6 Mon Sep 17 00:00:00 2001
From: James Meakin <12661555+jmsmkn@users.noreply.github.com>
Date: Wed, 8 Jan 2025 11:52:34 +0100
Subject: [PATCH 11/12] Add spacing to code blocks

---
 app/grandchallenge/core/static/css/base.scss | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/app/grandchallenge/core/static/css/base.scss b/app/grandchallenge/core/static/css/base.scss
index f9a8e6bd46..c8fe073f46 100644
--- a/app/grandchallenge/core/static/css/base.scss
+++ b/app/grandchallenge/core/static/css/base.scss
@@ -50,3 +50,7 @@ blockquote {
     border-left: $spacer * .25 solid $primary;
     color: $primary;
 }
+
+div.codehilite {
+    margin-bottom: $paragraph-margin-bottom;
+}

From 3ea67df0da46755d3f405ef2c24e366e7a3bb18f Mon Sep 17 00:00:00 2001
From: James Meakin <12661555+jmsmkn@users.noreply.github.com>
Date: Wed, 8 Jan 2025 12:53:22 +0100
Subject: [PATCH 12/12] Fix test

---
 app/tests/pages_tests/test_pages.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/tests/pages_tests/test_pages.py b/app/tests/pages_tests/test_pages.py
index 7b390ae788..09931fdae7 100644
--- a/app/tests/pages_tests/test_pages.py
+++ b/app/tests/pages_tests/test_pages.py
@@ -107,8 +107,8 @@ def test_page_create(client, two_challenge_sets):
     response = get_view_for_user(url=response.url, client=client)
     assert response.status_code == 200
     assert (
-        '<h1 id="hello-world">HELLO WORLD<a class="headerlink text-muted small pl-1" href="#hello-world" title="Permanent link">&para;</a></h1>'
-        in str(response.content)
+        '<h1 id="hello-world">HELLO WORLD<a class="headerlink text-muted small pl-1" href="#hello-world" title="Permanent link">¶</a></h1>'
+        in response.content.decode("utf-8")
     )
     # Check that it was created in the correct challenge
     response = get_view_for_user(