Skip to content

Commit

Permalink
chore: replace bleach with nh3
Browse files Browse the repository at this point in the history
  • Loading branch information
irtazaakram committed May 28, 2024
1 parent 63e940d commit f7229e0
Show file tree
Hide file tree
Showing 25 changed files with 92 additions and 62 deletions.
6 changes: 3 additions & 3 deletions lms/djangoapps/certificates/views/support.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import urllib
from functools import wraps

import bleach
import nh3
from django.db import transaction
from django.db.models import Q
from django.http import HttpResponse, HttpResponseBadRequest, HttpResponseForbidden, HttpResponseServerError
Expand Down Expand Up @@ -88,8 +88,8 @@ def search_certificates(request):
]
"""
unbleached_filter = urllib.parse.unquote(urllib.parse.quote_plus(request.GET.get("user", "")))
user_filter = bleach.clean(unbleached_filter)
uncleaned_filter = urllib.parse.unquote(urllib.parse.quote_plus(request.GET.get("user", "")))
user_filter = nh3.clean(uncleaned_filter)
if not user_filter:
msg = _("user is not given.")
return HttpResponseBadRequest(msg)
Expand Down
4 changes: 2 additions & 2 deletions lms/djangoapps/courseware/views/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from datetime import datetime
from urllib.parse import quote_plus, urlencode, urljoin, urlparse, urlunparse

import bleach
import nh3
import requests
from django.conf import settings
from django.contrib.auth.decorators import login_required
Expand Down Expand Up @@ -1550,7 +1550,7 @@ def render_xblock(request, usage_key_string, check_if_enrolled=True, disable_sta
requested_view = request.GET.get('view', 'student_view')
if requested_view != 'student_view' and requested_view != 'public_view': # lint-amnesty, pylint: disable=consider-using-in
return HttpResponseBadRequest(
f"Rendering of the xblock view '{bleach.clean(requested_view, strip=True)}' is not supported."
f"Rendering of the xblock view '{nh3.clean(requested_view)}' is not supported."
)

staff_access = has_access(request.user, 'staff', course_key)
Expand Down
2 changes: 1 addition & 1 deletion lms/djangoapps/discussion/django_comment_client/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1038,7 +1038,7 @@ def sanitize_body(body):
This is possibly overly broad, and might tamper with legitimate posts that
contain this code in fenced code blocks. As far as we can tell, this is an
extra layer of protection, and current handling in the front end and using
bleach for HTML rendering on the server side should cover these cases.
nh3 for HTML rendering on the server side should cover these cases.
"""
if not body:
return body
Expand Down
18 changes: 8 additions & 10 deletions lms/djangoapps/discussion/rest_api/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,16 @@
Note that this module is designed to imitate the front end behavior as
implemented in Markdown.Sanitizer.js.
"""
import bleach
import nh3
import markdown

ALLOWED_TAGS = bleach.ALLOWED_TAGS | {
ALLOWED_TAGS = nh3.ALLOWED_TAGS | {
'br', 'dd', 'del', 'dl', 'dt', 'h1', 'h2', 'h3', 'h4', 'hr', 'img', 'kbd', 'p', 'pre', 's',
'strike', 'sub', 'sup', 'table', 'thead', 'th', 'tbody', 'tr', 'td', 'tfoot'
}
ALLOWED_PROTOCOLS = {"http", "https", "ftp", "mailto"}
ALLOWED_ATTRIBUTES = {
"a": ["href", "title", "target", "rel"],
"img": ["src", "alt", "title", "width", "height"],
"a": {"href", "title", "target", "rel"},
"img": {"src", "alt", "title", "width", "height"},
}


Expand All @@ -25,17 +24,16 @@ def render_body(raw_body):
This includes the following steps:
* Convert Markdown to HTML
* Sanitise HTML using bleach
* Sanitise HTML using nh3
Note that this does not prevent Markdown syntax inside a MathJax block from
being processed, which the forums JavaScript code does.
"""
rendered_html = markdown.markdown(raw_body)
sanitised_html = bleach.clean(
sanitised_html = nh3.clean(
rendered_html,
tags=ALLOWED_TAGS,
protocols=ALLOWED_PROTOCOLS,
strip=True,
attributes=ALLOWED_ATTRIBUTES
attributes=ALLOWED_ATTRIBUTES,
link_rel=None,
)
return sanitised_html
2 changes: 1 addition & 1 deletion lms/djangoapps/discussion/rest_api/tests/test_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def test_protocols_img_tag(self, protocol, is_allowed):

def test_script_tag(self):
raw_body = '<script type="text/javascript">alert("evil script");</script>'
assert render_body(raw_body) == 'alert("evil script");'
assert render_body(raw_body) == ''

@ddt.data(
("br", '<p>foo<br>bar</p>'), # br is allowed inside p
Expand Down
4 changes: 2 additions & 2 deletions lms/templates/courseware/progress_graph.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<%page args="grade_summary, grade_cutoffs, graph_div_id, show_grade_breakdown = True, show_grade_cutoffs = True, **kwargs"/>
<%!
import bleach
import nh3
import json
import math
import six
Expand Down Expand Up @@ -74,7 +74,7 @@ $(function () {
## allowing the display of such images, and remove any previously stored HTML
## to prevent ugly HTML from being shown to learners.
## xss-lint: disable=javascript-jquery-append
ticks.append( [tickIndex, bleach.clean(section['label'], tags=set(), strip=True)] )
ticks.append( [tickIndex, nh3.clean(section['label'], tags=set())] )

if section['category'] in detail_tooltips:
## xss-lint: disable=javascript-jquery-append
Expand Down
2 changes: 1 addition & 1 deletion lms/templates/lti.html
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ <h4 class="hd hd-4 error_message">
% if has_score and comment:
<h4 class="hd hd-4 problem-feedback-label">${_("Feedback on your work from the grader:")}</h4>
<div class="problem-feedback">
## sanitized with bleach in view
## sanitized with nh3 in view
${comment | n, decode.utf8}
</div>
% endif
Expand Down
4 changes: 2 additions & 2 deletions openedx/core/djangoapps/debug/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"""


import bleach
import nh3
from django.http import HttpResponseNotFound
from django.template import TemplateDoesNotExist
from django.utils.translation import gettext as _
Expand Down Expand Up @@ -54,4 +54,4 @@ def show_reference_template(request, template):

return render_to_response(template, context)
except TemplateDoesNotExist:
return HttpResponseNotFound(f'Missing template {bleach.clean(template, strip=True)}')
return HttpResponseNotFound(f'Missing template {nh3.clean(template)}')
4 changes: 2 additions & 2 deletions openedx/core/djangoapps/user_authn/views/logout.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import urllib.parse as parse # pylint: disable=import-error
from urllib.parse import parse_qs, urlsplit, urlunsplit # pylint: disable=import-error

import bleach
import nh3
from django.conf import settings
from django.contrib.auth import logout
from django.shortcuts import redirect
Expand Down Expand Up @@ -60,7 +60,7 @@ def target(self):
# >> /courses/course-v1:ARTS+D1+2018_T/course/
# to handle this scenario we need to encode our URL using quote_plus and then unquote it again.
if target_url:
target_url = bleach.clean(parse.unquote(parse.quote_plus(target_url)))
target_url = nh3.clean(parse.unquote(parse.quote_plus(target_url)))

use_target_url = target_url and is_safe_login_or_logout_redirect(
redirect_to=target_url,
Expand Down
4 changes: 2 additions & 2 deletions openedx/core/djangoapps/user_authn/views/tests/test_logout.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import urllib
from unittest import mock
import ddt
import bleach
import nh3
from django.conf import settings
from django.test import TestCase
from django.test.utils import override_settings
Expand Down Expand Up @@ -237,6 +237,6 @@ def test_logout_redirect_failure_with_xss_vulnerability(self, redirect_url, host
)
response = self.client.get(url, HTTP_HOST=host)
expected = {
'target': bleach.clean(urllib.parse.unquote(redirect_url)),
'target': nh3.clean(urllib.parse.unquote(redirect_url)),
}
self.assertDictContainsSubset(expected, response.context_data)
4 changes: 2 additions & 2 deletions openedx/core/djangolib/markup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


import markupsafe
import bleach
import nh3
from lxml.html.clean import Cleaner
from mako.filters import decode

Expand Down Expand Up @@ -53,7 +53,7 @@ def strip_all_tags_but_br(string_to_strip):
string_to_strip = ""

string_to_strip = decode.utf8(string_to_strip)
string_to_strip = bleach.clean(string_to_strip, tags={'br'}, strip=True)
string_to_strip = nh3.clean(string_to_strip, tags={'br'})

return HTML(string_to_strip)

Expand Down
3 changes: 2 additions & 1 deletion requirements/edx/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ billiard==4.2.0
# via celery
bleach[css]==6.1.0
# via
# -r requirements/edx/kernel.in
# edx-enterprise
# lti-consumer-xblock
# openedx-django-wiki
Expand Down Expand Up @@ -728,6 +727,8 @@ newrelic==9.9.1
# via
# -r requirements/edx/bundled.in
# edx-django-utils
nh3==0.2.17
# via -r requirements/edx/kernel.in
nltk==3.8.1
# via chem
nodeenv==1.8.0
Expand Down
4 changes: 4 additions & 0 deletions requirements/edx/development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1259,6 +1259,10 @@ newrelic==9.9.1
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt
# edx-django-utils
nh3==0.2.17
# via
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt
nltk==3.8.1
# via
# -r requirements/edx/doc.txt
Expand Down
2 changes: 2 additions & 0 deletions requirements/edx/doc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,8 @@ newrelic==9.9.1
# via
# -r requirements/edx/base.txt
# edx-django-utils
nh3==0.2.17
# via -r requirements/edx/base.txt
nltk==3.8.1
# via
# -r requirements/edx/base.txt
Expand Down
2 changes: 1 addition & 1 deletion requirements/edx/kernel.in
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ acid-xblock # This XBlock is used for unit tests as well
analytics-python # Used for Segment analytics
attrs # Reduces boilerplate code involving class attributes
Babel # Internationalization utilities, used for date formatting in a few places
bleach[css] # Allowed-list-based HTML sanitizing library that escapes or strips markup and attributes; used for capa and LTI
boto # Deprecated version of the AWS SDK; we should stop using this
boto3 # Amazon Web Services SDK for Python
botocore # via boto3, s3transfer
Expand Down Expand Up @@ -110,6 +109,7 @@ Markdown # Convert text markup to HTML; used in capa
meilisearch # Library to access Meilisearch search engine (will replace ElasticSearch)
mongoengine # Object-document mapper for MongoDB, used in the LMS dashboard
mysqlclient # Driver for the default production relational database
nh3 # Python bindings to the ammonia (whitelist-based HTML sanitizing library); used for capa and LTI
nodeenv # Utility for managing Node.js environments; we use this for deployments and testing
oauthlib # OAuth specification support for authenticating via LTI or other Open edX services
olxcleaner
Expand Down
2 changes: 2 additions & 0 deletions requirements/edx/testing.txt
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,8 @@ newrelic==9.9.1
# via
# -r requirements/edx/base.txt
# edx-django-utils
nh3==0.2.17
# via -r requirements/edx/base.txt
nltk==3.8.1
# via
# -r requirements/edx/base.txt
Expand Down
Empty file added uwsgi.ini
Empty file.
4 changes: 2 additions & 2 deletions xmodule/capa/inputtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
import time
from datetime import datetime

import bleach
import nh3
import html5lib
import pyparsing
import six
Expand Down Expand Up @@ -800,7 +800,7 @@ def setup_code_response_rendering(self):
if self.status == 'incomplete':
self.status = 'queued'
self.queue_len = self.msg # lint-amnesty, pylint: disable=attribute-defined-outside-init
self.msg = bleach.clean(self.submitted_msg)
self.msg = nh3.clean(self.submitted_msg)

def setup(self):
""" setup this input type """
Expand Down
4 changes: 2 additions & 2 deletions xmodule/capa/tests/test_inputtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,7 +916,7 @@ def test_matlab_queue_message_not_allowed_tag(self):
}
elt = etree.fromstring(self.xml)
the_input = self.input_class(test_capa_system(), elt, state)
expected = "&lt;script&gt;Test message&lt;/script&gt;"
expected = ""
assert the_input.queue_msg == expected

def test_matlab_sanitize_msg(self):
Expand All @@ -925,7 +925,7 @@ def test_matlab_sanitize_msg(self):
"""
not_allowed_tag = 'script'
self.the_input.msg = "<{0}>Test message</{0}>".format(not_allowed_tag)
expected = "&lt;script&gt;Test message&lt;/script&gt;"
expected = ""
assert self.the_input._get_render_context()['msg'] == expected # pylint: disable=protected-access


Expand Down
6 changes: 3 additions & 3 deletions xmodule/capa/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def test_compare_with_tolerance(self): # lint-amnesty, pylint: disable=too-many

def test_sanitize_html(self):
"""
Test for html sanitization with bleach.
Test for html sanitization with nh3.
"""
allowed_tags = ['div', 'p', 'audio', 'pre', 'span']
for tag in allowed_tags:
Expand All @@ -130,7 +130,7 @@ def test_sanitize_html(self):

not_allowed_tag = 'script'
queue_msg = "<{0}>Test message</{0}>".format(not_allowed_tag)
expected = "&lt;script&gt;Test message&lt;/script&gt;"
expected = ""
assert sanitize_html(queue_msg) == expected

def test_get_inner_html_from_xpath(self):
Expand All @@ -142,7 +142,7 @@ def test_get_inner_html_from_xpath(self):

def test_remove_markup(self):
"""
Test for markup removal with bleach.
Test for markup removal with nh3.
"""
assert remove_markup('The <mark>Truth</mark> is <em>Out There</em> & you need to <strong>find</strong> it') ==\
'The Truth is Out There &amp; you need to find it'
Expand Down
23 changes: 10 additions & 13 deletions xmodule/capa/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@
from cmath import isinf, isnan
from decimal import Decimal

import bleach
import nh3
from calc import evaluator
from lxml import etree

from bleach.css_sanitizer import CSSSanitizer
from openedx.core.djangolib.markup import HTML

#-----------------------------------------------------------------------------
Expand Down Expand Up @@ -182,17 +181,15 @@ def sanitize_html(html_code):
Used to sanitize XQueue responses from Matlab.
"""
attributes = bleach.ALLOWED_ATTRIBUTES.copy()
attributes = nh3.ALLOWED_ATTRIBUTES.copy()
attributes.update({
'*': ['class', 'style', 'id'],
'audio': ['controls', 'autobuffer', 'autoplay', 'src'],
'img': ['src', 'width', 'height', 'class']
'*': {'class', 'style', 'id'},
'audio': {'controls', 'autobuffer', 'autoplay', 'src'},
'img': {'src', 'width', 'height', 'class'}
})
output = bleach.clean(
output = nh3.clean(
html_code,
protocols=bleach.ALLOWED_PROTOCOLS | {'data'},
tags=bleach.ALLOWED_TAGS | {'div', 'p', 'audio', 'pre', 'img', 'span'},
css_sanitizer=CSSSanitizer(allowed_css_properties=["white-space"]),
tags=nh3.ALLOWED_TAGS | {'div', 'p', 'audio', 'pre', 'img', 'span'},
attributes=attributes
)
return output
Expand All @@ -215,12 +212,12 @@ def remove_markup(html):
"""
Return html with markup stripped and text HTML-escaped.
>>> bleach.clean("<b>Rock & Roll</b>", tags=set(), strip=True)
>>> nh3.clean("<b>Rock & Roll</b>", tags=set())
'Rock &amp; Roll'
>>> bleach.clean("<b>Rock &amp; Roll</b>", tags=set(), strip=True)
>>> nh3.clean("<b>Rock &amp; Roll</b>", tags=set())
'Rock &amp; Roll'
"""
return HTML(bleach.clean(html, tags=set(), strip=True))
return HTML(nh3.clean(html, tags=set()))


def get_course_id_from_capa_block(capa_block):
Expand Down
4 changes: 2 additions & 2 deletions xmodule/capa_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import sys
import traceback

from bleach.sanitizer import Cleaner
import nh3
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.utils.encoding import smart_str
Expand Down Expand Up @@ -619,7 +619,7 @@ def index_dictionary(self):
capa_content = re.sub(
r"(\s|&nbsp;|//)+",
" ",
Cleaner(tags=[], strip=True).clean(capa_content)
nh3.clean(capa_content, tags=set())
)

capa_body = {
Expand Down
Loading

0 comments on commit f7229e0

Please sign in to comment.