mirror of
https://github.com/aljazceru/CTFd.git
synced 2025-12-17 05:54:19 +01:00
Use pybluemonday instead of lxml for html sanitization (#1837)
* Use `pybluemonday` instead of `lxml` for html sanitization * Fix boolean optional configs in `config.py` * Closes #1835
This commit is contained in:
@@ -46,12 +46,16 @@
|
||||
|
||||
**Deployment**
|
||||
|
||||
- Fix boolean configs from the `config.ini` optional section
|
||||
- Install `python3-dev` instead of `python-dev` in apt
|
||||
- Bump lxml to 4.6.2
|
||||
- Require `pybluemonday` as pip dependency
|
||||
- Remove `lxml` and `html5lib` from pip dependencies
|
||||
- Bump pip-compile to 5.4.0
|
||||
|
||||
**Miscellaneous**
|
||||
|
||||
- Rewrite the HTML santiziation feature (controlled by `HTML_SANITIZATION`) to use the pybluemonday library instead of lxml/html5lib
|
||||
- Note that this feature is still in beta
|
||||
- Cache Docker builds more by copying and installing Python dependencies before copying CTFd
|
||||
- Change the default emails slightly and rework confirmation email page to make some recommendations clearer
|
||||
- Use `examplectf.com` as testing/development domain instead of `ctfd.io`
|
||||
|
||||
@@ -174,25 +174,25 @@ class ServerConfig(object):
|
||||
AWS_S3_ENDPOINT_URL: str = empty_str_cast(config_ini["uploads"]["AWS_S3_ENDPOINT_URL"])
|
||||
|
||||
# === OPTIONAL ===
|
||||
REVERSE_PROXY: bool = empty_str_cast(config_ini["optional"]["REVERSE_PROXY"], default=False)
|
||||
REVERSE_PROXY: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["REVERSE_PROXY"], default=False))
|
||||
|
||||
TEMPLATES_AUTO_RELOAD: bool = empty_str_cast(config_ini["optional"]["TEMPLATES_AUTO_RELOAD"], default=True)
|
||||
TEMPLATES_AUTO_RELOAD: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["TEMPLATES_AUTO_RELOAD"], default=True))
|
||||
|
||||
THEME_FALLBACK: bool = empty_str_cast(config_ini["optional"]["THEME_FALLBACK"], default=False)
|
||||
THEME_FALLBACK: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["THEME_FALLBACK"], default=False))
|
||||
|
||||
SQLALCHEMY_TRACK_MODIFICATIONS: bool = empty_str_cast(config_ini["optional"]["SQLALCHEMY_TRACK_MODIFICATIONS"], default=False)
|
||||
SQLALCHEMY_TRACK_MODIFICATIONS: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["SQLALCHEMY_TRACK_MODIFICATIONS"], default=False))
|
||||
|
||||
SWAGGER_UI: bool = empty_str_cast(config_ini["optional"]["SWAGGER_UI"], default=False)
|
||||
SWAGGER_UI: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["SWAGGER_UI"], default=False))
|
||||
|
||||
SWAGGER_UI_ENDPOINT: str = "/" if SWAGGER_UI else None
|
||||
|
||||
UPDATE_CHECK: bool = empty_str_cast(config_ini["optional"]["UPDATE_CHECK"], default=True)
|
||||
UPDATE_CHECK: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["UPDATE_CHECK"], default=True))
|
||||
|
||||
APPLICATION_ROOT: str = empty_str_cast(config_ini["optional"]["APPLICATION_ROOT"], default="/")
|
||||
|
||||
SERVER_SENT_EVENTS: bool = empty_str_cast(config_ini["optional"]["SERVER_SENT_EVENTS"], default=True)
|
||||
SERVER_SENT_EVENTS: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["SERVER_SENT_EVENTS"], default=True))
|
||||
|
||||
HTML_SANITIZATION: bool = empty_str_cast(config_ini["optional"]["HTML_SANITIZATION"], default=False)
|
||||
HTML_SANITIZATION: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["HTML_SANITIZATION"], default=False))
|
||||
|
||||
if DATABASE_URL.startswith("sqlite") is False:
|
||||
SQLALCHEMY_ENGINE_OPTIONS = {
|
||||
|
||||
@@ -1,56 +1,97 @@
|
||||
# Bandit complains about security issues with lxml.
|
||||
# These issues have been addressed in the past and do not apply to parsing HTML.
|
||||
from lxml.html import html5parser, tostring # nosec B410
|
||||
from lxml.html.clean import Cleaner # nosec B410
|
||||
from lxml.html.defs import safe_attrs # nosec B410
|
||||
from pybluemonday import UGCPolicy
|
||||
|
||||
cleaner = Cleaner(
|
||||
comments=False,
|
||||
page_structure=False,
|
||||
embedded=False,
|
||||
frames=False,
|
||||
forms=False,
|
||||
links=False,
|
||||
meta=False,
|
||||
style=False,
|
||||
safe_attrs=(
|
||||
safe_attrs
|
||||
| {
|
||||
"style",
|
||||
# Allow data attributes from bootstrap elements
|
||||
"data-toggle",
|
||||
"data-target",
|
||||
"data-dismiss",
|
||||
"data-spy",
|
||||
"data-offset",
|
||||
"data-html",
|
||||
"data-placement",
|
||||
"data-parent",
|
||||
"data-title",
|
||||
"data-template",
|
||||
"data-interval",
|
||||
"data-keyboard",
|
||||
"data-pause",
|
||||
"data-ride",
|
||||
"data-wrap",
|
||||
"data-touch",
|
||||
"data-flip",
|
||||
"data-boundary",
|
||||
"data-reference",
|
||||
"data-display",
|
||||
"data-animation",
|
||||
"data-container",
|
||||
"data-delay",
|
||||
"data-selector",
|
||||
"data-content",
|
||||
"data-trigger",
|
||||
}
|
||||
),
|
||||
annoying_tags=False,
|
||||
# Copied from lxml:
|
||||
# https://github.com/lxml/lxml/blob/e986a9cb5d54827c59aefa8803bc90954d67221e/src/lxml/html/defs.py#L38
|
||||
# fmt: off
|
||||
SAFE_ATTRS = (
|
||||
'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align',
|
||||
'alt', 'axis', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff',
|
||||
'charset', 'checked', 'cite', 'class', 'clear', 'cols', 'colspan',
|
||||
'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', 'enctype',
|
||||
'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace', 'id',
|
||||
'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method',
|
||||
'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly',
|
||||
'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape',
|
||||
'size', 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
|
||||
'type', 'usemap', 'valign', 'value', 'vspace', 'width'
|
||||
)
|
||||
# fmt: on
|
||||
|
||||
PAGE_STRUCTURE_TAGS = {
|
||||
"title": [],
|
||||
}
|
||||
|
||||
META_TAGS = {
|
||||
"meta": ["name", "content", "property"],
|
||||
}
|
||||
|
||||
FORM_TAGS = {
|
||||
"form": ["method", "action"],
|
||||
"button": ["name", "type", "value", "disabled"],
|
||||
"input": ["name", "type", "value", "placeholder"],
|
||||
"select": ["name", "value", "placeholder"],
|
||||
"option": ["value"],
|
||||
"textarea": ["name", "value", "placeholder"],
|
||||
"label": ["for"],
|
||||
}
|
||||
|
||||
ANNOYING_TAGS = {
|
||||
"blink": [],
|
||||
"marquee": [],
|
||||
}
|
||||
|
||||
|
||||
MEDIA_TAGS = {
|
||||
"audio": ["autoplay", "controls", "crossorigin", "loop", "muted", "preload", "src"],
|
||||
"video": [
|
||||
"autoplay",
|
||||
"buffered",
|
||||
"controls",
|
||||
"crossorigin",
|
||||
"loop",
|
||||
"muted",
|
||||
"playsinline",
|
||||
"poster",
|
||||
"preload",
|
||||
"src",
|
||||
],
|
||||
"source": ["src", "type"],
|
||||
"iframe": ["width", "height", "src", "frameborder", "allow", "allowfullscreen"],
|
||||
}
|
||||
|
||||
SANITIZER = UGCPolicy()
|
||||
|
||||
for TAGS in (PAGE_STRUCTURE_TAGS, META_TAGS, FORM_TAGS, ANNOYING_TAGS, MEDIA_TAGS):
|
||||
for element in TAGS:
|
||||
SANITIZER.AllowElements(element)
|
||||
SANITIZER.AllowAttrs(*TAGS[element]).OnElements(element)
|
||||
|
||||
# Allow safe attrs copied from lxml
|
||||
SANITIZER.AllowAttrs(*SAFE_ATTRS).Globally()
|
||||
|
||||
# Allow styling globally
|
||||
SANITIZER.AllowAttrs("class", "style").Globally()
|
||||
|
||||
# Allow styling via bluemonday
|
||||
SANITIZER.AllowStyling()
|
||||
|
||||
# Allow safe convenience functions from bluemonday
|
||||
SANITIZER.AllowStandardAttributes()
|
||||
SANITIZER.AllowStandardURLs()
|
||||
|
||||
# Allow data atributes
|
||||
SANITIZER.AllowDataAttributes()
|
||||
|
||||
# Allow data URI images
|
||||
SANITIZER.AllowDataURIImages()
|
||||
|
||||
# Link security
|
||||
SANITIZER.AllowRelativeURLs(True)
|
||||
SANITIZER.RequireNoFollowOnFullyQualifiedLinks(True)
|
||||
SANITIZER.RequireNoFollowOnLinks(True)
|
||||
SANITIZER.RequireNoReferrerOnFullyQualifiedLinks(True)
|
||||
SANITIZER.RequireNoReferrerOnLinks(True)
|
||||
|
||||
|
||||
def sanitize_html(html):
|
||||
html = html5parser.fragment_fromstring(html, create_parent="div")
|
||||
html = cleaner.clean_html(tostring(html)).decode()
|
||||
return html
|
||||
return SANITIZER.sanitize(html)
|
||||
|
||||
@@ -24,9 +24,8 @@ marshmallow-sqlalchemy==0.17.0
|
||||
boto3==1.13.9
|
||||
marshmallow==2.20.2
|
||||
pydantic==1.5.1
|
||||
lxml==4.6.2
|
||||
html5lib==1.0.1
|
||||
WTForms==2.3.1
|
||||
python-geoacumen==0.0.1
|
||||
maxminddb==1.5.4
|
||||
tenacity==6.2.0
|
||||
pybluemonday==0.0.3
|
||||
|
||||
@@ -11,7 +11,7 @@ bcrypt==3.1.7 # via -r requirements.in
|
||||
boto3==1.13.9 # via -r requirements.in
|
||||
botocore==1.16.26 # via boto3, s3transfer
|
||||
certifi==2020.11.8 # via requests
|
||||
cffi==1.14.3 # via bcrypt, cmarkgfm
|
||||
cffi==1.14.3 # via bcrypt, cmarkgfm, pybluemonday
|
||||
chardet==3.0.4 # via requests
|
||||
click==7.1.2 # via flask
|
||||
cmarkgfm==0.4.2 # via -r requirements.in
|
||||
@@ -27,20 +27,19 @@ flask==1.1.2 # via -r requirements.in, flask-caching, flask-marshma
|
||||
gevent==20.9.0 # via -r requirements.in
|
||||
greenlet==0.4.17 # via gevent
|
||||
gunicorn==20.0.4 # via -r requirements.in
|
||||
html5lib==1.0.1 # via -r requirements.in
|
||||
idna==2.10 # via requests
|
||||
importlib-metadata==2.0.0 # via jsonschema
|
||||
itsdangerous==1.1.0 # via -r requirements.in, flask
|
||||
jinja2==2.11.2 # via -r requirements.in, flask
|
||||
jmespath==0.10.0 # via boto3, botocore
|
||||
jsonschema==3.2.0 # via flask-restx
|
||||
lxml==4.6.2 # via -r requirements.in
|
||||
mako==1.1.3 # via alembic
|
||||
markupsafe==1.1.1 # via jinja2, mako, wtforms
|
||||
marshmallow-sqlalchemy==0.17.0 # via -r requirements.in
|
||||
marshmallow==2.20.2 # via -r requirements.in, flask-marshmallow, marshmallow-sqlalchemy
|
||||
maxminddb==1.5.4 # via -r requirements.in, python-geoacumen
|
||||
passlib==1.7.2 # via -r requirements.in
|
||||
pybluemonday==0.0.3 # via -r requirements.in
|
||||
pycparser==2.20 # via cffi
|
||||
pydantic==1.5.1 # via -r requirements.in
|
||||
pymysql==0.9.3 # via -r requirements.in
|
||||
@@ -53,12 +52,11 @@ pytz==2020.4 # via flask-restx
|
||||
redis==3.5.2 # via -r requirements.in
|
||||
requests==2.23.0 # via -r requirements.in
|
||||
s3transfer==0.3.3 # via boto3
|
||||
six==1.15.0 # via bcrypt, flask-marshmallow, flask-restx, html5lib, jsonschema, python-dateutil, sqlalchemy-utils, tenacity
|
||||
six==1.15.0 # via bcrypt, flask-marshmallow, flask-restx, jsonschema, python-dateutil, sqlalchemy-utils, tenacity
|
||||
sqlalchemy-utils==0.36.6 # via -r requirements.in
|
||||
sqlalchemy==1.3.17 # via -r requirements.in, alembic, dataset, flask-sqlalchemy, marshmallow-sqlalchemy, sqlalchemy-utils
|
||||
tenacity==6.2.0 # via -r requirements.in
|
||||
urllib3==1.25.11 # via botocore, requests
|
||||
webencodings==0.5.1 # via html5lib
|
||||
werkzeug==1.0.1 # via -r requirements.in, flask, flask-restx
|
||||
wtforms==2.3.1 # via -r requirements.in
|
||||
zipp==3.4.0 # via importlib-metadata
|
||||
|
||||
Reference in New Issue
Block a user