Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions openedx/core/djangolib/markup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Utilities for use in Mako markup.
"""


import markupsafe
import nh3
from lxml.html.clean import Cleaner
Expand All @@ -11,10 +10,10 @@
# Text() can be used to declare a string as plain text, as HTML() is used
# for HTML. It simply wraps markupsafe's escape, which will HTML-escape if
# it isn't already escaped.
Text = markupsafe.escape # pylint: disable=invalid-name
Text = markupsafe.escape # pylint: disable=invalid-name


def HTML(html): # pylint: disable=invalid-name
def HTML(html): # pylint: disable=invalid-name
"""
Mark a string as already HTML, so that it won't be escaped before output.

Expand Down Expand Up @@ -53,7 +52,7 @@ def strip_all_tags_but_br(string_to_strip):
string_to_strip = ""

string_to_strip = decode.utf8(string_to_strip)
string_to_strip = nh3.clean(string_to_strip, tags={'br'})
string_to_strip = nh3.clean(string_to_strip, tags={"br"})

return HTML(string_to_strip)

Expand Down
59 changes: 29 additions & 30 deletions openedx/core/djangolib/tests/test_markup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Tests for openedx.core.djangolib.markup
"""


import unittest

import ddt
Expand All @@ -22,7 +21,7 @@ class FormatHtmlTest(unittest.TestCase):
("hello", "hello"),
("<hello>", "&lt;hello&gt;"),
("It's cool", "It&#39;s cool"),
('"cool," she said.', '&#34;cool,&#34; she said.'),
('"cool," she said.', "&#34;cool,&#34; she said."),
("Stop & Shop", "Stop &amp; Shop"),
("<a>нтмℓ-єѕ¢αρє∂</a>", "&lt;a&gt;нтмℓ-єѕ¢αρє∂&lt;/a&gt;"),
)
Expand Down Expand Up @@ -59,18 +58,18 @@ def test_mako(self):
%>
${Text(_(u"A & {BC}")).format(BC=HTML("B & C"))}
""",
default_filters=['decode.utf8', 'h'],
default_filters=["decode.utf8", "h"],
)
out = template.render()
assert out.strip() == 'A &amp; B & C'
assert out.strip() == "A &amp; B & C"

def test_ungettext(self):
for i in [1, 2]:
out = Text(ngettext("1 & {}", "2 & {}", i)).format(HTML("<>"))
assert out == f'{i} &amp; <>'
assert out == f"{i} &amp; <>"

def test_strip_all_tags_but_br_filter(self):
""" Verify filter removes every tags except br """
"""Verify filter removes every tags except br"""
template = Template(
"""
<%page expression_filter="h"/>
Expand All @@ -82,20 +81,20 @@ def test_strip_all_tags_but_br_filter(self):
)
rendered_template = template.render()

assert '<br>' in rendered_template
assert '<script>' not in rendered_template
assert "<br>" in rendered_template
assert "<script>" not in rendered_template

def test_strip_all_tags_but_br_returns_html(self):
"""
Verify filter returns HTML Markup safe string object
"""

html = strip_all_tags_but_br('{name}<br><script>')
html = html.format(name='Rock & Roll')
assert str(html) == 'Rock &amp; Roll<br>'
html = strip_all_tags_but_br("{name}<br><script>")
html = html.format(name="Rock & Roll")
assert str(html) == "Rock &amp; Roll<br>"

def test_clean_dengers_html_filter(self):
""" Verify filter removes expected tags """
"""Verify filter removes expected tags"""
template = Template(
"""
<%page expression_filter="h"/>
Expand Down Expand Up @@ -139,21 +138,21 @@ def test_clean_dengers_html_filter(self):
"""
)
rendered_template = template.render()
html_soup = BeautifulSoup(rendered_template, 'html.parser')

assert html_soup.find('a')
assert html_soup.find('div')
assert html_soup.find('div', attrs={'style': 'display: none'})
assert html_soup.find('p')
assert html_soup.find('img')

assert not html_soup.find('a', attrs={'onclick': 'evil_function()'})
assert not html_soup.find('html')
assert not html_soup.find('head')
assert not html_soup.find('script')
assert not html_soup.find('style')
assert not html_soup.find('link')
assert not html_soup.find('iframe')
assert not html_soup.find('form')
assert not html_soup.find('blink')
assert not html_soup.find('object')
html_soup = BeautifulSoup(rendered_template, "html.parser")

assert html_soup.find("a")
assert html_soup.find("div")
assert html_soup.find("div", attrs={"style": "display: none"})
assert html_soup.find("p")
assert html_soup.find("img")

assert not html_soup.find("a", attrs={"onclick": "evil_function()"})
assert not html_soup.find("html")
assert not html_soup.find("head")
assert not html_soup.find("script")
assert not html_soup.find("style")
assert not html_soup.find("link")
assert not html_soup.find("iframe")
assert not html_soup.find("form")
assert not html_soup.find("blink")
assert not html_soup.find("object")
35 changes: 22 additions & 13 deletions openedx/core/lib/safe_lxml/xmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@
#
"""lxml.etree protection"""

from __future__ import print_function, absolute_import
from __future__ import absolute_import, print_function

import threading

from lxml import etree as _etree

from defusedxml.common import DTDForbidden, EntitiesForbidden, NotSupportedError
from lxml import etree as _etree

LXML3 = _etree.LXML_VERSION[0] >= 3

Expand All @@ -18,8 +17,7 @@


class RestrictedElement(_etree.ElementBase):
"""A restricted Element class that filters out instances of some classes
"""
"""A restricted Element class that filters out instances of some classes"""

__slots__ = ()
blacklist = (_etree._Entity, _etree._ProcessingInstruction, _etree._Comment) # pylint: disable=protected-access
Expand All @@ -36,19 +34,25 @@ def __iter__(self):
return self._filter(iterator)

def iterchildren(self, tag=None, reversed=False): # pylint: disable=redefined-builtin
iterator = super(RestrictedElement, self).iterchildren(tag=tag, reversed=reversed) # pylint: disable=super-with-arguments
iterator = super(RestrictedElement, self).iterchildren( # pylint: disable=super-with-arguments
tag=tag, reversed=reversed
)
return self._filter(iterator)

def iter(self, tag=None, *tags): # pylint: disable=keyword-arg-before-vararg
iterator = super(RestrictedElement, self).iter(tag=tag, *tags) # pylint: disable=super-with-arguments
return self._filter(iterator)

def iterdescendants(self, tag=None, *tags): # pylint: disable=keyword-arg-before-vararg
iterator = super(RestrictedElement, self).iterdescendants(tag=tag, *tags) # pylint: disable=super-with-arguments
iterator = super(RestrictedElement, self).iterdescendants( # pylint: disable=super-with-arguments
tag=tag, *tags
)
return self._filter(iterator)

def itersiblings(self, tag=None, preceding=False):
iterator = super(RestrictedElement, self).itersiblings(tag=tag, preceding=preceding) # pylint: disable=super-with-arguments
iterator = super(RestrictedElement, self).itersiblings( # pylint: disable=super-with-arguments
tag=tag, preceding=preceding
)
return self._filter(iterator)

def getchildren(self):
Expand All @@ -61,8 +65,7 @@ def getiterator(self, tag=None):


class GlobalParserTLS(threading.local):
"""Thread local context for custom parser instances
"""
"""Thread local context for custom parser instances"""

parser_config = {
"resolve_entities": False,
Expand Down Expand Up @@ -104,7 +107,9 @@ def check_docinfo(elementtree, forbid_dtd=False, forbid_entities=True):
raise DTDForbidden(docinfo.doctype, docinfo.system_url, docinfo.public_id)
if forbid_entities and not LXML3:
# lxml < 3 has no iterentities()
raise NotSupportedError("Unable to check for entity declarations " "in lxml 2.x") # pylint: disable=implicit-str-concat
raise NotSupportedError(
"Unable to check for entity declarations in lxml 2.x"
) # pylint: disable=implicit-str-concat

if forbid_entities:
for dtd in docinfo.internalDTD, docinfo.externalDTD:
Expand All @@ -114,15 +119,19 @@ def check_docinfo(elementtree, forbid_dtd=False, forbid_entities=True):
raise EntitiesForbidden(entity.name, entity.content, None, None, None, None)


def parse(source, parser=None, base_url=None, forbid_dtd=False, forbid_entities=True): # pylint: disable=missing-function-docstring
def parse(
source, parser=None, base_url=None, forbid_dtd=False, forbid_entities=True
): # pylint: disable=missing-function-docstring
if parser is None:
parser = getDefaultParser()
elementtree = _etree.parse(source, parser, base_url=base_url)
check_docinfo(elementtree, forbid_dtd, forbid_entities)
return elementtree


def fromstring(text, parser=None, base_url=None, forbid_dtd=False, forbid_entities=True): # pylint: disable=missing-function-docstring
def fromstring(
text, parser=None, base_url=None, forbid_dtd=False, forbid_entities=True
): # pylint: disable=missing-function-docstring
if parser is None:
parser = getDefaultParser()
rootelement = _etree.fromstring(text, parser, base_url=base_url)
Expand Down
4 changes: 3 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@ python_files = tests.py test_*.py tests_*.py *_tests.py __init__.py
# http://nedbatchelder.com/blog/200711/rethrowing_exceptions_in_python.html
# It's a little unusual, but we have good reasons for doing so, so we disable
# this rule.
# E203: whitespace before ':'
# We ignore this because of black formatter defaults
# E305,E402,E722,E731,E741,E743,W503,W504: errors and warnings added since pep8/pycodestyle
# 1.5.7 that we haven't cleaned up yet
ignore=E265,E266,E305,E402,E501,E722,E731,E741,E743,W503,W504,W602
ignore=E203,E265,E266,E305,E402,E501,E722,E731,E741,E743,W503,W504,W602
exclude=migrations,.git,.pycharm_helpers,.tox,test_root/staticfiles,node_modules

[isort]
Expand Down
Loading
Loading