Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix buggy code blocks in markdown renderer #3764

Merged
merged 13 commits into from
Jan 8, 2025
6 changes: 5 additions & 1 deletion app/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,7 +795,11 @@ def get_private_ip():
MARKDOWNX_MARKDOWNIFY_FUNCTION = (
"grandchallenge.core.templatetags.bleach.md2html"
)
MARKDOWNX_MARKDOWN_EXTENSION_CONFIGS = {}
MARKDOWNX_MARKDOWN_EXTENSION_CONFIGS = {
"markdown.extensions.codehilite": {
"wrapcode": False,
}
}
MARKDOWNX_IMAGE_MAX_SIZE = {"size": (2000, 0), "quality": 90}
MARKDOWNX_EDITOR_RESIZABLE = "False"

Expand Down
4 changes: 4 additions & 0 deletions app/grandchallenge/core/static/css/base.scss
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,7 @@ blockquote {
border-left: $spacer * .25 solid $primary;
color: $primary;
}

div.codehilite {
margin-bottom: $paragraph-margin-bottom;
}
67 changes: 20 additions & 47 deletions app/grandchallenge/core/utils/markdown.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,34 @@
from xml.etree import ElementTree

from bs4 import BeautifulSoup, Tag
from bs4 import BeautifulSoup
from markdown import Extension
from markdown.postprocessors import Postprocessor
from markdown.treeprocessors import Treeprocessor


class BS4Extension(Extension):
def extendMarkdown(self, md): # noqa: N802
md.registerExtension(self)
md.treeprocessors.register(BS4Treeprocessor(md), "bs4_extension", 0)


class BS4Treeprocessor(Treeprocessor):
def run(self, root):
el_class_dict = {
"img": "img-fluid",
"blockquote": "blockquote",
"table": "table table-hover table-borderless",
"thead": "thead-light",
"code": "codehilite",
}
md.postprocessors.register(BS4Postprocessor(md), "bs4_extension", 0)

for el in root.iter():
if el.tag in el_class_dict:
self.set_css_class(
element=el, class_name=el_class_dict[el.tag]
)

for i, html_block in enumerate(self.md.htmlStash.rawHtmlBlocks):
bs4block = BeautifulSoup(html_block, "html.parser")

for tag, tag_class in el_class_dict.items():
for el in bs4block.find_all(tag):
self.set_css_class(element=el, class_name=tag_class)
self.md.htmlStash.rawHtmlBlocks[i] = str(bs4block)

@staticmethod
def set_css_class(*, element, class_name):
if isinstance(element, ElementTree.Element):
current_class = element.attrib.get("class", "")
class BS4Postprocessor(Postprocessor):
def run(self, text):
soup = BeautifulSoup(text, "html.parser")

if class_name not in current_class:
new_class = f"{current_class} {class_name}".strip()
element.set("class", new_class)

elif isinstance(element, Tag):
if "class" not in element.attrs:
element.attrs["class"] = []

current_class = element["class"]
class_map = {
"img": ["img-fluid"],
"blockquote": ["blockquote"],
"table": ["table", "table-hover", "table-borderless"],
"thead": ["thead-light"],
"code": ["codehilite"],
}

for name in class_name.split(" "):
if class_name not in current_class:
current_class.append(name)
for element in soup.find_all([*class_map.keys()]):
classes = element.get("class", [])
for new_class in class_map[element.name]:
if new_class not in classes:
classes.append(new_class)
element["class"] = classes

else:
raise TypeError("Unsupported element")
return str(soup)


class LinkBlankTargetExtension(Extension):
Expand Down
93 changes: 42 additions & 51 deletions app/tests/core_tests/test_markdown.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import textwrap

import pytest
from django.conf import settings
from markdown import markdown

from grandchallenge.core.templatetags.bleach import md2html
from grandchallenge.core.utils.markdown import BS4Treeprocessor


@pytest.mark.parametrize(
Expand Down Expand Up @@ -55,9 +53,9 @@ def test_function():
</tr>
</tbody>
</table>
<div class="codehilite"><pre><span></span><code class="codehilite"><span class="k">def</span><span class="w"> </span><span class="nf">test_function</span><span class="p">():</span>
<div class="codehilite"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">test_function</span><span class="p">():</span>
<span class="k">pass</span>
</code></pre></div>"""
</pre></div>"""
),
),
(
Expand Down Expand Up @@ -136,7 +134,6 @@ def test_function():
<blockquote class="ml-3 blockquote">
<p>Quote Me Existing Class</p>
</blockquote>

<table class="table table-hover table-borderless">
<thead class="thead-light">
<tr>
Expand Down Expand Up @@ -167,7 +164,6 @@ def test_function():
<tbody>
</tbody>
</table>

<table class="ml-3 table table-hover table-borderless">
<thead class="ml-3 thead-light">
<tr>
Expand All @@ -177,23 +173,28 @@ def test_function():
<tbody>
</tbody>
</table>

<div class="codehilite"><pre><span></span><code class="codehilite"><span class="k">def</span><span class="w"> </span><span class="nf">test_function</span><span class="p">():</span>
<div class="codehilite"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">test_function</span><span class="p">():</span>
<span class="k">pass</span>
</code></pre></div>

</pre></div>
<div><pre><code class="codehilite">no class</code></pre></div>
<div class="ml-3"><pre><code class="ml-3 codehilite">existing class</code></pre></div>

<p><del>Delete me</del></p>
<p>CH<sub>3</sub>CH<sub>2</sub>OH
text<sub>a subscript</sub></p>
<ul>
<li>Just paste links directly in the document like this: <a href="https://google.com">https://google.com</a>.</li>
<li>Or even an email address: <a href="&#109;&#97;&#105;&#108;&#116;&#111;&#58;&#102;&#97;&#107;&#101;&#46;&#101;&#109;&#97;&#105;&#108;&#64;&#101;&#109;&#97;&#105;&#108;&#46;&#99;&#111;&#109;">&#102;&#97;&#107;&#101;&#46;&#101;&#109;&#97;&#105;&#108;&#64;&#101;&#109;&#97;&#105;&#108;&#46;&#99;&#111;&#109;</a>.</li>
<li>Or even an email address: <a href="mailto:[email protected]">[email protected]</a>.</li>
</ul>"""
),
),
(
"&lt;script&gt;alert(&quot;foo&quot;)&lt;/script&gt;",
'<p>&lt;script&gt;alert("foo")&lt;/script&gt;</p>',
),
(
"[![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)",
'<p><a href="https://google.com"><img class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg"></a></p>',
),
),
)
def test_markdown_rendering(markdown_with_html, expected_output):
Expand All @@ -202,54 +203,44 @@ def test_markdown_rendering(markdown_with_html, expected_output):


@pytest.mark.parametrize(
"markdown_with_html, expected_output",
jmsmkn marked this conversation as resolved.
Show resolved Hide resolved
(
(
"""<img src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
[![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)""",
"""<p><img class="img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
<a href="https://google.com"><img alt="" class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg" /></a></p>""",
"html, expected_output",
[
( # Unaffected element
"<div>Content</div>",
"<div>Content</div>",
),
(
"""<img class="" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
[![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)""",
"""<p><img class="img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
<a href="https://google.com"><img alt="" class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg" /></a></p>""",
( # With Markdown
"> Content",
'<blockquote class="blockquote">\n<p>Content</p>\n</blockquote>',
),
(
"""<img class="ml-2" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
[![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)""",
"""<p><img class="ml-2 img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
<a href="https://google.com"><img alt="" class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg" /></a></p>""",
( # Mixed content
"> Markdown Content\n"
"<blockquote class=>HTML Content</blockquote>",
'<blockquote class="blockquote">\n<p>Markdown Content</p>\n</blockquote>\n<blockquote class="blockquote">HTML Content</blockquote>',
),
(
"""<img class="img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
[![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)""",
"""<p><img class="img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
<a href="https://google.com"><img alt="" class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg" /></a></p>""",
( # Empty class
'<blockquote class="">Content</blockquote>',
'<blockquote class="blockquote">Content</blockquote>',
),
(
"""<img class="ml-2 img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
[![](http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg)](https://google.com)""",
"""<p><img class="ml-2 img-fluid" src="https://rumc-gcorg-p-public.s3.amazonaws.com/i/2023/10/20/042179f0-ad8c-4c0b-af54-7e81ba389a90.jpeg"/>
<a href="https://google.com"><img alt="" class="img-fluid" src="http://minio.localhost:9000/grand-challenge-public/i/2024/08/06/77c8d999-c22b-4983-8558-8e1fa364cd2c.jpg" /></a></p>""",
( # Existing class
'<blockquote class="ml-2">Content</blockquote>',
'<blockquote class="ml-2 blockquote">Content</blockquote>',
),
),
( # Extension class already present
'<blockquote class="blockquote">Content</blockquote>',
'<blockquote class="blockquote">Content</blockquote>',
),
( # Existing class + extension class
'<blockquote class="ml-2 blockquote">Content</blockquote>',
'<blockquote class="ml-2 blockquote">Content</blockquote>',
),
],
)
def test_setting_class_to_html_img_within_markdown(
markdown_with_html, expected_output
):
def test_extend_html_tag_classes(html, expected_output, settings):
output = markdown(
text=markdown_with_html,
text=html,
extensions=settings.MARKDOWNX_MARKDOWN_EXTENSIONS,
extension_configs=settings.MARKDOWNX_MARKDOWN_EXTENSION_CONFIGS,
)

assert output == expected_output


def test_tree_processor_set_css_class_type_error():
with pytest.raises(TypeError):
BS4Treeprocessor.set_css_class(
element="element", class_name="img-fluid"
)
4 changes: 2 additions & 2 deletions app/tests/pages_tests/test_pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ def test_page_create(client, two_challenge_sets):
response = get_view_for_user(url=response.url, client=client)
assert response.status_code == 200
assert (
'<h1 id="hello-world">HELLO WORLD<a class="headerlink text-muted small pl-1" href="#hello-world" title="Permanent link">&para;</a></h1>'
in str(response.content)
'<h1 id="hello-world">HELLO WORLD<a class="headerlink text-muted small pl-1" href="#hello-world" title="Permanent link"></a></h1>'
in response.content.decode("utf-8")
)
# Check that it was created in the correct challenge
response = get_view_for_user(
Expand Down
Loading