From 2cf765ea583af515802da2dc6127ed40c0703741 Mon Sep 17 00:00:00 2001
From: Chris van Run
Date: Wed, 18 Dec 2024 16:54:38 +0100
Subject: [PATCH 01/12] Ensure optional postprocessors are ran before the
default postprocessors
---
app/config/settings.py | 26 ++++++++++++++++---
.../core/templatetags/bleach.py | 2 +-
2 files changed, 23 insertions(+), 5 deletions(-)
diff --git a/app/config/settings.py b/app/config/settings.py
index 8e06ba64a0..e6621d52e4 100644
--- a/app/config/settings.py
+++ b/app/config/settings.py
@@ -22,7 +22,7 @@
from config.denylist import USERNAME_DENYLIST
from grandchallenge.components.exceptions import PriorStepFailed
from grandchallenge.core.utils import strtobool
-from grandchallenge.core.utils.markdown import BS4Extension
+from grandchallenge.core.utils.markdown import ExtendTagClasses
MEGABYTE = 1024 * 1024
GIGABYTE = 1024 * MEGABYTE
@@ -789,13 +789,31 @@ def get_private_ip():
"pymdownx.magiclink",
"pymdownx.tasklist",
"pymdownx.tilde",
- BS4Extension(),
]
-MARKDOWN_POST_PROCESSORS = []
+
+MARKDOWN_POST_PROCESSORS = [
+ ExtendTagClasses(
+ {
+ "img": ["img-fluid"],
+ "blockquote": ["blockquote"],
+ "table": [
+ "table",
+ "table-hover",
+ "table-borderless",
+ ],
+ "thead": ["thead-light"],
+ "code": ["codehilite"],
+ }
+ )
+]
MARKDOWNX_MARKDOWNIFY_FUNCTION = (
"grandchallenge.core.templatetags.bleach.md2html"
)
-MARKDOWNX_MARKDOWN_EXTENSION_CONFIGS = {}
+MARKDOWNX_MARKDOWN_EXTENSION_CONFIGS = {
+ "markdown.extensions.codehilite": {
+ "wrapcode": False,
+ }
+}
MARKDOWNX_IMAGE_MAX_SIZE = {"size": (2000, 0), "quality": 90}
MARKDOWNX_EDITOR_RESIZABLE = "False"
diff --git a/app/grandchallenge/core/templatetags/bleach.py b/app/grandchallenge/core/templatetags/bleach.py
index a0c9386e14..2bf85bfc4e 100644
--- a/app/grandchallenge/core/templatetags/bleach.py
+++ b/app/grandchallenge/core/templatetags/bleach.py
@@ -87,7 +87,7 @@ def md2html(
post_processors = [*settings.MARKDOWN_POST_PROCESSORS]
if process_youtube_tags:
- post_processors.append(YOUTUBE_TAG_SUBSTITUTION)
+ post_processors = [YOUTUBE_TAG_SUBSTITUTION, *post_processors]
for processor in post_processors:
cleaned_html = processor(cleaned_html)
From b51031250e947595748ab23be6f8e9a2feacb40f Mon Sep 17 00:00:00 2001
From: Chris van Run
Date: Wed, 18 Dec 2024 16:55:13 +0100
Subject: [PATCH 02/12] Add new ExtendTagClasses post processor
---
app/grandchallenge/core/utils/markdown.py | 67 +++++++----------------
1 file changed, 20 insertions(+), 47 deletions(-)
diff --git a/app/grandchallenge/core/utils/markdown.py b/app/grandchallenge/core/utils/markdown.py
index 98b9d94176..c963831486 100644
--- a/app/grandchallenge/core/utils/markdown.py
+++ b/app/grandchallenge/core/utils/markdown.py
@@ -1,61 +1,34 @@
-from xml.etree import ElementTree
-
-from bs4 import BeautifulSoup, Tag
+from bs4 import BeautifulSoup
+from django.utils.html import escape
+from django.utils.safestring import SafeString, mark_safe
from markdown import Extension
from markdown.treeprocessors import Treeprocessor
-class BS4Extension(Extension):
- def extendMarkdown(self, md): # noqa: N802
- md.registerExtension(self)
- md.treeprocessors.register(BS4Treeprocessor(md), "bs4_extension", 0)
-
-
-class BS4Treeprocessor(Treeprocessor):
- def run(self, root):
- el_class_dict = {
- "img": "img-fluid",
- "blockquote": "blockquote",
- "table": "table table-hover table-borderless",
- "thead": "thead-light",
- "code": "codehilite",
- }
-
- for el in root.iter():
- if el.tag in el_class_dict:
- self.set_css_class(
- element=el, class_name=el_class_dict[el.tag]
- )
-
- for i, html_block in enumerate(self.md.htmlStash.rawHtmlBlocks):
- bs4block = BeautifulSoup(html_block, "html.parser")
+class ExtendTagClasses:
+ def __init__(self, tag_classes):
+ self.tag_class_dict = tag_classes
- for tag, tag_class in el_class_dict.items():
- for el in bs4block.find_all(tag):
- self.set_css_class(element=el, class_name=tag_class)
- self.md.htmlStash.rawHtmlBlocks[i] = str(bs4block)
+ def __call__(self, html):
+ input_is_safe = isinstance(html, SafeString)
- @staticmethod
- def set_css_class(*, element, class_name):
- if isinstance(element, ElementTree.Element):
- current_class = element.attrib.get("class", "")
+ soup = BeautifulSoup(html, "html.parser")
+ for tag, classes in self.tag_class_dict.items():
- if class_name not in current_class:
- new_class = f"{current_class} {class_name}".strip()
- element.set("class", new_class)
+ # Make extensions safe
+ classes = [escape(c).strip() for c in classes]
- elif isinstance(element, Tag):
- if "class" not in element.attrs:
- element.attrs["class"] = []
+ # Add extension to the class attribute
+ for element in soup.find_all(tag):
+ current_classes = element.get("class", [])
+ element["class"] = [*current_classes, *classes]
- current_class = element["class"]
+ new_html = str(soup)
- for name in class_name.split(" "):
- if class_name not in current_class:
- current_class.append(name)
+ if input_is_safe:
+ new_html = mark_safe(new_html)
- else:
- raise TypeError("Unsupported element")
+ return mark_safe(new_html)
class LinkBlankTargetExtension(Extension):
From 72d340203f4118cc2da3651be42e73ab1506a2e2 Mon Sep 17 00:00:00 2001
From: Chris van Run
Date: Wed, 18 Dec 2024 16:55:23 +0100
Subject: [PATCH 03/12] First fixes to markdown tests
---
app/tests/core_tests/test_markdown.py | 120 +++++++++++---------------
1 file changed, 52 insertions(+), 68 deletions(-)
diff --git a/app/tests/core_tests/test_markdown.py b/app/tests/core_tests/test_markdown.py
index 3dcf485759..460ddef207 100644
--- a/app/tests/core_tests/test_markdown.py
+++ b/app/tests/core_tests/test_markdown.py
@@ -5,61 +5,57 @@
from markdown import markdown
from grandchallenge.core.templatetags.bleach import md2html
-from grandchallenge.core.utils.markdown import BS4Treeprocessor
@pytest.mark.parametrize(
"markdown_with_html, expected_output",
(
- (
- textwrap.dedent(
- """
- ![](test.png)
-
- > Quote Me
-
- Markdown | Less | Pretty
- --- | --- | ---
- *Still* | `renders` | **nicely**
- 1 | 2 | 3
-
- ```python
- def test_function():
- pass
- ```"""
- ),
- textwrap.dedent(
- """\
-
-
- Quote Me
-
-
-
-
- Markdown |
- Less |
- Pretty |
-
-
-
-
- Still |
- renders |
- nicely |
-
-
- 1 |
- 2 |
- 3 |
-
-
-
- def test_function():
- pass
-
"""
- ),
- ),
+ # (
+ # textwrap.dedent(
+ # """
+ # ![](test.png)
+ # > Quote Me
+ # Markdown | Less | Pretty
+ # --- | --- | ---
+ # *Still* | `renders` | **nicely**
+ # 1 | 2 | 3
+ # ```python
+ # def test_function():
+ # pass
+ # ```"""
+ # ),
+ # textwrap.dedent(
+ # """\
+ #
+ #
+ # Quote Me
+ #
+ #
+ #
+ #
+ # Markdown |
+ # Less |
+ # Pretty |
+ #
+ #
+ #
+ #
+ # Still |
+ # renders |
+ # nicely |
+ #
+ #
+ # 1 |
+ # 2 |
+ # 3 |
+ #
+ #
+ #
+ # def test_function():
+ # pass
+ #
"""
+ # ),
+ # ),
(
textwrap.dedent(
r"""
@@ -123,10 +119,10 @@ def test_function():
),
textwrap.dedent(
"""\
-
-
-
-
+
+
+
+
Quote Me
@@ -136,7 +132,6 @@ def test_function():
Quote Me Existing Class
-
@@ -167,7 +162,6 @@ def test_function():
-
@@ -177,14 +171,11 @@ def test_function():
-
- def test_function():
+ def test_function():
pass
-
-
+
-
Delete me
CH3CH2OH
texta subscript
@@ -246,10 +237,3 @@ def test_setting_class_to_html_img_within_markdown(
)
assert output == expected_output
-
-
-def test_tree_processor_set_css_class_type_error():
- with pytest.raises(TypeError):
- BS4Treeprocessor.set_css_class(
- element="element", class_name="img-fluid"
- )
From 42d41dcba3a24e2f0731ea1a421798922b5626cd Mon Sep 17 00:00:00 2001
From: Chris van Run
Date: Fri, 20 Dec 2024 15:47:07 +0100
Subject: [PATCH 04/12] Add HTML postprocessor to extend classes
---
app/config/settings.py | 7 +-
app/grandchallenge/core/utils/markdown.py | 51 ++++--
app/tests/core_tests/test_markdown.py | 214 ++++++++++++++--------
3 files changed, 181 insertions(+), 91 deletions(-)
diff --git a/app/config/settings.py b/app/config/settings.py
index e6621d52e4..7303c51c38 100644
--- a/app/config/settings.py
+++ b/app/config/settings.py
@@ -22,7 +22,7 @@
from config.denylist import USERNAME_DENYLIST
from grandchallenge.components.exceptions import PriorStepFailed
from grandchallenge.core.utils import strtobool
-from grandchallenge.core.utils.markdown import ExtendTagClasses
+from grandchallenge.core.utils.markdown import ExtendHTMLTagClasses
MEGABYTE = 1024 * 1024
GIGABYTE = 1024 * MEGABYTE
@@ -790,9 +790,8 @@ def get_private_ip():
"pymdownx.tasklist",
"pymdownx.tilde",
]
-
MARKDOWN_POST_PROCESSORS = [
- ExtendTagClasses(
+ ExtendHTMLTagClasses(
{
"img": ["img-fluid"],
"blockquote": ["blockquote"],
@@ -804,7 +803,7 @@ def get_private_ip():
"thead": ["thead-light"],
"code": ["codehilite"],
}
- )
+ ),
]
MARKDOWNX_MARKDOWNIFY_FUNCTION = (
"grandchallenge.core.templatetags.bleach.md2html"
diff --git a/app/grandchallenge/core/utils/markdown.py b/app/grandchallenge/core/utils/markdown.py
index c963831486..4427529ef1 100644
--- a/app/grandchallenge/core/utils/markdown.py
+++ b/app/grandchallenge/core/utils/markdown.py
@@ -5,30 +5,53 @@
from markdown.treeprocessors import Treeprocessor
-class ExtendTagClasses:
+class BeautifulSoupWithCharEntities(BeautifulSoup):
+ """
+ Soup generator that elegantly handles reserved HTML entity placeholders.
+
+ For instance, the soup HTMLparser replaces these (e.g. '<') into their
+ unicode equivalents (e.g. '<').
+
+ This messes up things if the HTML is decoded into a string again.
+ """
+
+ def __init__(self, /, markup, features="html.parser", **kwargs):
+ markup = markup.replace("&", "&")
+
+ super().__init__(markup=markup, features=features, **kwargs)
+
+ def decode(self, **kwargs):
+ # Prevent entity subsitution (e.g. "&" -> "&")
+ kwargs["formatter"] = None
+ return super().decode(**kwargs)
+
+
+class ExtendHTMLTagClasses:
def __init__(self, tag_classes):
- self.tag_class_dict = tag_classes
+ # Make extensions safe
+ self.tag_class_dict = {
+ t: [escape(c).strip() for c in classes]
+ for t, classes in tag_classes.items()
+ }
def __call__(self, html):
input_is_safe = isinstance(html, SafeString)
- soup = BeautifulSoup(html, "html.parser")
- for tag, classes in self.tag_class_dict.items():
-
- # Make extensions safe
- classes = [escape(c).strip() for c in classes]
+ soup = BeautifulSoupWithCharEntities(markup=html)
- # Add extension to the class attribute
- for element in soup.find_all(tag):
- current_classes = element.get("class", [])
- element["class"] = [*current_classes, *classes]
+ for element in soup.find_all(self.tag_class_dict.keys()):
+ classes = element.get("class", [])
+ for new_class in self.tag_class_dict[element.name]:
+ if new_class not in classes:
+ classes.append(new_class)
+ element["class"] = classes
- new_html = str(soup)
+ new_markup = soup.decode()
if input_is_safe:
- new_html = mark_safe(new_html)
+ new_markup = mark_safe(new_markup)
- return mark_safe(new_html)
+ return new_markup
class LinkBlankTargetExtension(Extension):
diff --git a/app/tests/core_tests/test_markdown.py b/app/tests/core_tests/test_markdown.py
index 460ddef207..2dedc5df6b 100644
--- a/app/tests/core_tests/test_markdown.py
+++ b/app/tests/core_tests/test_markdown.py
@@ -1,61 +1,64 @@
import textwrap
import pytest
-from django.conf import settings
-from markdown import markdown
+from django.utils.safestring import SafeString, mark_safe
from grandchallenge.core.templatetags.bleach import md2html
+from grandchallenge.core.utils.markdown import ExtendHTMLTagClasses
@pytest.mark.parametrize(
"markdown_with_html, expected_output",
(
- # (
- # textwrap.dedent(
- # """
- # ![](test.png)
- # > Quote Me
- # Markdown | Less | Pretty
- # --- | --- | ---
- # *Still* | `renders` | **nicely**
- # 1 | 2 | 3
- # ```python
- # def test_function():
- # pass
- # ```"""
- # ),
- # textwrap.dedent(
- # """\
- #
- #
- # Quote Me
- #
- #
- #
- #
- # Markdown |
- # Less |
- # Pretty |
- #
- #
- #
- #
- # Still |
- # renders |
- # nicely |
- #
- #
- # 1 |
- # 2 |
- # 3 |
- #
- #
- #
- # def test_function():
- # pass
- #
"""
- # ),
- # ),
+ (
+ textwrap.dedent(
+ """
+ ![](test.png)
+
+ > Quote Me
+
+ Markdown | Less | Pretty
+ --- | --- | ---
+ *Still* | `renders` | **nicely**
+ 1 | 2 | 3
+
+ ```python
+ def test_function():
+ pass
+ ```"""
+ ),
+ textwrap.dedent(
+ """\
+
+
+ Quote Me
+
+
+
+
+ Markdown |
+ Less |
+ Pretty |
+
+
+
+
+ Still |
+ renders |
+ nicely |
+
+
+ 1 |
+ 2 |
+ 3 |
+
+
+
+ def test_function():
+ pass
+
"""
+ ),
+ ),
(
textwrap.dedent(
r"""
@@ -185,6 +188,10 @@ def test_function():
"""
),
),
+ (
+ "<script>alert("foo")</script>",
+ "<script>alert("foo")</script>",
+ ),
),
)
def test_markdown_rendering(markdown_with_html, expected_output):
@@ -196,44 +203,105 @@ def test_markdown_rendering(markdown_with_html, expected_output):
"markdown_with_html, expected_output",
(
(
- """
- [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)""",
- """
-
""",
+ textwrap.dedent(
+ """\
+
+ [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
+ ),
+ textwrap.dedent(
+ """\
+
+
"""
+ ),
),
(
- """
- [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)""",
- """
-
""",
+ textwrap.dedent(
+ """\
+
+ [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
+ ),
+ textwrap.dedent(
+ """\
+
+
"""
+ ),
),
(
- """
- [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)""",
- """
-
""",
+ textwrap.dedent(
+ """\
+
+ [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
+ ),
+ textwrap.dedent(
+ """\
+
+
"""
+ ),
),
(
- """
- [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)""",
- """
-
""",
+ textwrap.dedent(
+ """\
+
+ [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
+ ),
+ textwrap.dedent(
+ """\
+
+
"""
+ ),
),
(
- """
- [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)""",
- """
-
""",
+ textwrap.dedent(
+ """\
+
+ [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
+ ),
+ textwrap.dedent(
+ """\
+
+
"""
+ ),
),
),
)
def test_setting_class_to_html_img_within_markdown(
markdown_with_html, expected_output
):
- output = markdown(
- text=markdown_with_html,
- extensions=settings.MARKDOWNX_MARKDOWN_EXTENSIONS,
- extension_configs=settings.MARKDOWNX_MARKDOWN_EXTENSION_CONFIGS,
- )
+ output = md2html(markdown=markdown_with_html)
assert output == expected_output
+
+
+@pytest.mark.parametrize(
+ "html, is_safe",
+ [
+ (
+ mark_safe("Content
"),
+ True,
+ ),
+ (
+ "Content
",
+ False,
+ ),
+ ],
+)
+def test_extend_html_tag_classes_insecure_markup(html, is_safe):
+ tag_classes = {"div": ["new-class"]}
+
+ # Instantiate the class
+ extender = ExtendHTMLTagClasses(tag_classes)
+
+ # Process the HTML
+ result = extender(html)
+
+ # Check if the output matches the expected safety status
+ assert isinstance(result, SafeString) == is_safe
+
+
+def test_extend_html_tag_classes_insecure_classes():
+ extender = ExtendHTMLTagClasses({"div": ['']})
+ output = extender("Content
")
+ assert (
+ output
+ == 'Content
'
+ )
From 237838495a78120be3a64f1bc56026c8f748fa17 Mon Sep 17 00:00:00 2001
From: Chris van Run
Date: Fri, 20 Dec 2024 16:24:20 +0100
Subject: [PATCH 05/12] Refactor test
---
app/tests/core_tests/test_markdown.py | 137 +++++++++-----------------
1 file changed, 45 insertions(+), 92 deletions(-)
diff --git a/app/tests/core_tests/test_markdown.py b/app/tests/core_tests/test_markdown.py
index 2dedc5df6b..75e887f1c8 100644
--- a/app/tests/core_tests/test_markdown.py
+++ b/app/tests/core_tests/test_markdown.py
@@ -190,118 +190,71 @@ def test_function():
),
(
"<script>alert("foo")</script>",
- "<script>alert("foo")</script>",
- ),
- ),
-)
-def test_markdown_rendering(markdown_with_html, expected_output):
- output = md2html(markdown=markdown_with_html)
- assert output == expected_output
-
-
-@pytest.mark.parametrize(
- "markdown_with_html, expected_output",
- (
- (
- textwrap.dedent(
- """\
-
- [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
- ),
- textwrap.dedent(
- """\
-
-
"""
- ),
- ),
- (
- textwrap.dedent(
- """\
-
- [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
- ),
- textwrap.dedent(
- """\
-
-
"""
- ),
- ),
- (
- textwrap.dedent(
- """\
-
- [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
- ),
- textwrap.dedent(
- """\
-
-
"""
- ),
+ "<script>alert("foo")</script>
",
),
(
- textwrap.dedent(
- """\
-
- [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
- ),
- textwrap.dedent(
- """\
-
-
"""
- ),
- ),
- (
- textwrap.dedent(
- """\
-
- [![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)"""
- ),
- textwrap.dedent(
- """\
-
-
"""
- ),
+ "[![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)",
+ '
',
),
),
)
-def test_setting_class_to_html_img_within_markdown(
- markdown_with_html, expected_output
-):
+def test_markdown_rendering(markdown_with_html, expected_output):
output = md2html(markdown=markdown_with_html)
-
assert output == expected_output
@pytest.mark.parametrize(
- "html, is_safe",
+ "html, tag_classes, expected_output, is_safe",
[
- (
+ ( # Safe input
mark_safe("Content
"),
+ {},
+ "Content
",
True,
),
- (
+ ( # Unsafe input
+ "Content
",
+ {},
"Content
",
False,
),
+ ( # Escaped classes
+ mark_safe("Content
"),
+ {"div": ['']},
+ 'Content
',
+ True,
+ ),
+ ( # Empty class
+ 'Content
',
+ {"div": ["foo"]},
+ 'Content
',
+ False,
+ ),
+ ( # Existing class
+ 'Content
',
+ {"div": ["foo"]},
+ 'Content
',
+ False,
+ ),
+ ( # Extension class already present
+ 'Content
',
+ {"div": ["foo"]},
+ 'Content
',
+ False,
+ ),
+ ( # Existing class + extension class
+ 'Content
',
+ {"div": ["foo"]},
+ 'Content
',
+ False,
+ ),
],
)
-def test_extend_html_tag_classes_insecure_markup(html, is_safe):
- tag_classes = {"div": ["new-class"]}
-
- # Instantiate the class
+def test_extend_html_tag_classes(html, tag_classes, expected_output, is_safe):
extender = ExtendHTMLTagClasses(tag_classes)
+ output = extender(html)
- # Process the HTML
- result = extender(html)
+ assert output == expected_output
# Check if the output matches the expected safety status
- assert isinstance(result, SafeString) == is_safe
-
-
-def test_extend_html_tag_classes_insecure_classes():
- extender = ExtendHTMLTagClasses({"div": ['']})
- output = extender("Content
")
- assert (
- output
- == 'Content
'
- )
+ assert isinstance(output, SafeString) == is_safe
From 5210a63e0007861957cddf1d8c546d3bd70018e9 Mon Sep 17 00:00:00 2001
From: Chris van Run
Date: Fri, 20 Dec 2024 16:24:30 +0100
Subject: [PATCH 06/12] Handle empty tags
---
app/grandchallenge/core/utils/markdown.py | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/app/grandchallenge/core/utils/markdown.py b/app/grandchallenge/core/utils/markdown.py
index 4427529ef1..7f820de2ea 100644
--- a/app/grandchallenge/core/utils/markdown.py
+++ b/app/grandchallenge/core/utils/markdown.py
@@ -29,7 +29,7 @@ def decode(self, **kwargs):
class ExtendHTMLTagClasses:
def __init__(self, tag_classes):
# Make extensions safe
- self.tag_class_dict = {
+ self.clean_tag_classes = {
t: [escape(c).strip() for c in classes]
for t, classes in tag_classes.items()
}
@@ -39,12 +39,14 @@ def __call__(self, html):
soup = BeautifulSoupWithCharEntities(markup=html)
- for element in soup.find_all(self.tag_class_dict.keys()):
- classes = element.get("class", [])
- for new_class in self.tag_class_dict[element.name]:
- if new_class not in classes:
- classes.append(new_class)
- element["class"] = classes
+ tags = [*self.clean_tag_classes.keys()]
+ if tags:
+ for element in soup.find_all(tags):
+ classes = element.get("class", [])
+ for new_class in self.clean_tag_classes[element.name]:
+ if new_class not in classes:
+ classes.append(new_class)
+ element["class"] = classes
new_markup = soup.decode()
From edae00c7aa6d405bdabb25ddb326c843bf84e4cd Mon Sep 17 00:00:00 2001
From: Chris van Run
Date: Fri, 20 Dec 2024 16:46:46 +0100
Subject: [PATCH 07/12] Fix youtube tag subsitution test
---
.../core_tests/test_tag_substitutions.py | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
diff --git a/app/tests/core_tests/test_tag_substitutions.py b/app/tests/core_tests/test_tag_substitutions.py
index 9d76d73287..3ae6208ee9 100644
--- a/app/tests/core_tests/test_tag_substitutions.py
+++ b/app/tests/core_tests/test_tag_substitutions.py
@@ -1,3 +1,5 @@
+import textwrap
+
import pytest
from django.utils.safestring import SafeString, mark_safe
@@ -131,16 +133,13 @@ def test_no_change_with_no_tag_or_arg_match(content):
assert s == content
-EXPECTED_YOUTUBE_EMBED = """
-
-
-
"""
+EXPECTED_YOUTUBE_EMBED = textwrap.dedent(
+ """\
+
+
+
+ """
+)
@pytest.mark.parametrize(
From f7286a9703ab3c8106003c33271d13da1fb2720d Mon Sep 17 00:00:00 2001
From: James Meakin <12661555+jmsmkn@users.noreply.github.com>
Date: Wed, 8 Jan 2025 11:27:43 +0100
Subject: [PATCH 08/12] Revert to using markdown processor
---
app/config/settings.py | 19 +----
.../core/templatetags/bleach.py | 2 +-
app/grandchallenge/core/utils/markdown.py | 65 +++++-----------
app/tests/core_tests/test_markdown.py | 75 +++++++------------
4 files changed, 52 insertions(+), 109 deletions(-)
diff --git a/app/config/settings.py b/app/config/settings.py
index 7303c51c38..7310eeba90 100644
--- a/app/config/settings.py
+++ b/app/config/settings.py
@@ -22,7 +22,7 @@
from config.denylist import USERNAME_DENYLIST
from grandchallenge.components.exceptions import PriorStepFailed
from grandchallenge.core.utils import strtobool
-from grandchallenge.core.utils.markdown import ExtendHTMLTagClasses
+from grandchallenge.core.utils.markdown import BS4Extension
MEGABYTE = 1024 * 1024
GIGABYTE = 1024 * MEGABYTE
@@ -789,22 +789,9 @@ def get_private_ip():
"pymdownx.magiclink",
"pymdownx.tasklist",
"pymdownx.tilde",
+ BS4Extension(),
]
-MARKDOWN_POST_PROCESSORS = [
- ExtendHTMLTagClasses(
- {
- "img": ["img-fluid"],
- "blockquote": ["blockquote"],
- "table": [
- "table",
- "table-hover",
- "table-borderless",
- ],
- "thead": ["thead-light"],
- "code": ["codehilite"],
- }
- ),
-]
+MARKDOWN_POST_PROCESSORS = []
MARKDOWNX_MARKDOWNIFY_FUNCTION = (
"grandchallenge.core.templatetags.bleach.md2html"
)
diff --git a/app/grandchallenge/core/templatetags/bleach.py b/app/grandchallenge/core/templatetags/bleach.py
index 2bf85bfc4e..a0c9386e14 100644
--- a/app/grandchallenge/core/templatetags/bleach.py
+++ b/app/grandchallenge/core/templatetags/bleach.py
@@ -87,7 +87,7 @@ def md2html(
post_processors = [*settings.MARKDOWN_POST_PROCESSORS]
if process_youtube_tags:
- post_processors = [YOUTUBE_TAG_SUBSTITUTION, *post_processors]
+ post_processors.append(YOUTUBE_TAG_SUBSTITUTION)
for processor in post_processors:
cleaned_html = processor(cleaned_html)
diff --git a/app/grandchallenge/core/utils/markdown.py b/app/grandchallenge/core/utils/markdown.py
index 7f820de2ea..28df5d6c11 100644
--- a/app/grandchallenge/core/utils/markdown.py
+++ b/app/grandchallenge/core/utils/markdown.py
@@ -1,59 +1,34 @@
from bs4 import BeautifulSoup
-from django.utils.html import escape
-from django.utils.safestring import SafeString, mark_safe
from markdown import Extension
+from markdown.postprocessors import Postprocessor
from markdown.treeprocessors import Treeprocessor
-class BeautifulSoupWithCharEntities(BeautifulSoup):
- """
- Soup generator that elegantly handles reserved HTML entity placeholders.
-
- For instance, the soup HTMLparser replaces these (e.g. '<') into their
- unicode equivalents (e.g. '<').
-
- This messes up things if the HTML is decoded into a string again.
- """
-
- def __init__(self, /, markup, features="html.parser", **kwargs):
- markup = markup.replace("&", "&")
-
- super().__init__(markup=markup, features=features, **kwargs)
+class BS4Extension(Extension):
+ def extendMarkdown(self, md): # noqa: N802
+ md.postprocessors.register(BS4Postprocessor(md), "bs4_extension", 0)
- def decode(self, **kwargs):
- # Prevent entity subsitution (e.g. "&" -> "&")
- kwargs["formatter"] = None
- return super().decode(**kwargs)
+class BS4Postprocessor(Postprocessor):
+ def run(self, text):
+ soup = BeautifulSoup(text, "html.parser")
-class ExtendHTMLTagClasses:
- def __init__(self, tag_classes):
- # Make extensions safe
- self.clean_tag_classes = {
- t: [escape(c).strip() for c in classes]
- for t, classes in tag_classes.items()
+ class_map = {
+ "img": ["img-fluid"],
+ "blockquote": ["blockquote"],
+ "table": ["table", "table-hover", "table-borderless"],
+ "thead": ["thead-light"],
+ "code": ["codehilite"],
}
- def __call__(self, html):
- input_is_safe = isinstance(html, SafeString)
-
- soup = BeautifulSoupWithCharEntities(markup=html)
-
- tags = [*self.clean_tag_classes.keys()]
- if tags:
- for element in soup.find_all(tags):
- classes = element.get("class", [])
- for new_class in self.clean_tag_classes[element.name]:
- if new_class not in classes:
- classes.append(new_class)
- element["class"] = classes
-
- new_markup = soup.decode()
-
- if input_is_safe:
- new_markup = mark_safe(new_markup)
+ for element in soup.find_all([*class_map.keys()]):
+ classes = element.get("class", [])
+ for new_class in class_map[element.name]:
+ if new_class not in classes:
+ classes.append(new_class)
+ element["class"] = classes
- return new_markup
+ return str(soup)
class LinkBlankTargetExtension(Extension):
diff --git a/app/tests/core_tests/test_markdown.py b/app/tests/core_tests/test_markdown.py
index 91025fbde1..40e3bbc238 100644
--- a/app/tests/core_tests/test_markdown.py
+++ b/app/tests/core_tests/test_markdown.py
@@ -1,10 +1,9 @@
import textwrap
import pytest
-from django.utils.safestring import SafeString, mark_safe
+from markdown import markdown
from grandchallenge.core.templatetags.bleach import md2html
-from grandchallenge.core.utils.markdown import ExtendHTMLTagClasses
@pytest.mark.parametrize(
@@ -29,7 +28,7 @@ def test_function():
),
textwrap.dedent(
"""\
-
+
Quote Me
@@ -122,10 +121,10 @@ def test_function():
),
textwrap.dedent(
"""\
-
-
-
-
+
+
+
+
Quote Me
@@ -184,17 +183,17 @@ def test_function():
texta subscript
"""
),
),
(
"<script>alert("foo")</script>",
- "<script>alert("foo")</script>
",
+ '<script>alert("foo")</script>
',
),
(
"[![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)",
- '
',
+ '
',
),
),
)
@@ -204,57 +203,39 @@ def test_markdown_rendering(markdown_with_html, expected_output):
@pytest.mark.parametrize(
- "html, tag_classes, expected_output, is_safe",
+ "html, expected_output",
[
- ( # Safe input
- mark_safe("Content
"),
- {},
+ ( # Unaffected element
"Content
",
- True,
- ),
- ( # Unsafe input
- "Content
",
- {},
"Content
",
- False,
),
- ( # Escaped classes
- mark_safe("Content
"),
- {"div": ['']},
- 'Content
',
- True,
+ ( # With Markdown
+ "> Content",
+ '\nContent
\n
',
),
( # Empty class
- 'Content
',
- {"div": ["foo"]},
- 'Content
',
- False,
+ 'Content
',
+ 'Content
',
),
( # Existing class
- 'Content
',
- {"div": ["foo"]},
- 'Content
',
- False,
+ 'Content
',
+ 'Content
',
),
( # Extension class already present
- 'Content
',
- {"div": ["foo"]},
- 'Content
',
- False,
+ 'Content
',
+ 'Content
',
),
( # Existing class + extension class
- 'Content
',
- {"div": ["foo"]},
- 'Content
',
- False,
+ 'Content
',
+ 'Content
',
),
],
)
-def test_extend_html_tag_classes(html, tag_classes, expected_output, is_safe):
- extender = ExtendHTMLTagClasses(tag_classes)
- output = extender(html)
+def test_extend_html_tag_classes(html, expected_output, settings):
+ output = markdown(
+ text=html,
+ extensions=settings.MARKDOWNX_MARKDOWN_EXTENSIONS,
+ extension_configs=settings.MARKDOWNX_MARKDOWN_EXTENSION_CONFIGS,
+ )
assert output == expected_output
-
- # Check if the output matches the expected safety status
- assert isinstance(output, SafeString) == is_safe
From a08d295c1a37123053204321060b213b51856a49 Mon Sep 17 00:00:00 2001
From: James Meakin <12661555+jmsmkn@users.noreply.github.com>
Date: Wed, 8 Jan 2025 11:35:36 +0100
Subject: [PATCH 09/12] Revert expected text
---
.../core_tests/test_tag_substitutions.py | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/app/tests/core_tests/test_tag_substitutions.py b/app/tests/core_tests/test_tag_substitutions.py
index 3ae6208ee9..9d76d73287 100644
--- a/app/tests/core_tests/test_tag_substitutions.py
+++ b/app/tests/core_tests/test_tag_substitutions.py
@@ -1,5 +1,3 @@
-import textwrap
-
import pytest
from django.utils.safestring import SafeString, mark_safe
@@ -133,13 +131,16 @@ def test_no_change_with_no_tag_or_arg_match(content):
assert s == content
-EXPECTED_YOUTUBE_EMBED = textwrap.dedent(
- """\
-
-
-
- """
-)
+EXPECTED_YOUTUBE_EMBED = """
+
+
+"""
@pytest.mark.parametrize(
From 910c14594be5dc25fb345658b4bc20bb2fc2a08b Mon Sep 17 00:00:00 2001
From: James Meakin <12661555+jmsmkn@users.noreply.github.com>
Date: Wed, 8 Jan 2025 11:41:55 +0100
Subject: [PATCH 10/12] Add a test for mixed content
---
app/tests/core_tests/test_markdown.py | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/app/tests/core_tests/test_markdown.py b/app/tests/core_tests/test_markdown.py
index 40e3bbc238..c04275ba09 100644
--- a/app/tests/core_tests/test_markdown.py
+++ b/app/tests/core_tests/test_markdown.py
@@ -213,6 +213,11 @@ def test_markdown_rendering(markdown_with_html, expected_output):
"> Content",
'\nContent
\n
',
),
+ ( # Mixed content
+ "> Markdown Content\n"
+ "HTML Content
",
+ '\nMarkdown Content
\n
\nHTML Content
',
+ ),
( # Empty class
'Content
',
'Content
',
From 1f6a64b791b50d6a2192a70121ef36d34bf3b6a6 Mon Sep 17 00:00:00 2001
From: James Meakin <12661555+jmsmkn@users.noreply.github.com>
Date: Wed, 8 Jan 2025 11:52:34 +0100
Subject: [PATCH 11/12] Add spacing to code blocks
---
app/grandchallenge/core/static/css/base.scss | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/app/grandchallenge/core/static/css/base.scss b/app/grandchallenge/core/static/css/base.scss
index f9a8e6bd46..c8fe073f46 100644
--- a/app/grandchallenge/core/static/css/base.scss
+++ b/app/grandchallenge/core/static/css/base.scss
@@ -50,3 +50,7 @@ blockquote {
border-left: $spacer * .25 solid $primary;
color: $primary;
}
+
+div.codehilite {
+ margin-bottom: $paragraph-margin-bottom;
+}
From 3ea67df0da46755d3f405ef2c24e366e7a3bb18f Mon Sep 17 00:00:00 2001
From: James Meakin <12661555+jmsmkn@users.noreply.github.com>
Date: Wed, 8 Jan 2025 12:53:22 +0100
Subject: [PATCH 12/12] Fix test
---
app/tests/pages_tests/test_pages.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/app/tests/pages_tests/test_pages.py b/app/tests/pages_tests/test_pages.py
index 7b390ae788..09931fdae7 100644
--- a/app/tests/pages_tests/test_pages.py
+++ b/app/tests/pages_tests/test_pages.py
@@ -107,8 +107,8 @@ def test_page_create(client, two_challenge_sets):
response = get_view_for_user(url=response.url, client=client)
assert response.status_code == 200
assert (
- 'HELLO WORLD
'
- in str(response.content)
+ 'HELLO WORLD
'
+ in response.content.decode("utf-8")
)
# Check that it was created in the correct challenge
response = get_view_for_user(