mirror of
https://github.com/aljazceru/CTFd.git
synced 2025-12-17 14:04:20 +01:00
Use pybluemonday instead of lxml for html sanitization (#1837)
* Use `pybluemonday` instead of `lxml` for html sanitization * Fix boolean optional configs in `config.py` * Closes #1835
This commit is contained in:
@@ -46,12 +46,16 @@
|
|||||||
|
|
||||||
**Deployment**
|
**Deployment**
|
||||||
|
|
||||||
|
- Fix boolean configs from the `config.ini` optional section
|
||||||
- Install `python3-dev` instead of `python-dev` in apt
|
- Install `python3-dev` instead of `python-dev` in apt
|
||||||
- Bump lxml to 4.6.2
|
- Require `pybluemonday` as pip dependency
|
||||||
|
- Remove `lxml` and `html5lib` from pip dependencies
|
||||||
- Bump pip-compile to 5.4.0
|
- Bump pip-compile to 5.4.0
|
||||||
|
|
||||||
**Miscellaneous**
|
**Miscellaneous**
|
||||||
|
|
||||||
|
- Rewrite the HTML santiziation feature (controlled by `HTML_SANITIZATION`) to use the pybluemonday library instead of lxml/html5lib
|
||||||
|
- Note that this feature is still in beta
|
||||||
- Cache Docker builds more by copying and installing Python dependencies before copying CTFd
|
- Cache Docker builds more by copying and installing Python dependencies before copying CTFd
|
||||||
- Change the default emails slightly and rework confirmation email page to make some recommendations clearer
|
- Change the default emails slightly and rework confirmation email page to make some recommendations clearer
|
||||||
- Use `examplectf.com` as testing/development domain instead of `ctfd.io`
|
- Use `examplectf.com` as testing/development domain instead of `ctfd.io`
|
||||||
|
|||||||
@@ -174,25 +174,25 @@ class ServerConfig(object):
|
|||||||
AWS_S3_ENDPOINT_URL: str = empty_str_cast(config_ini["uploads"]["AWS_S3_ENDPOINT_URL"])
|
AWS_S3_ENDPOINT_URL: str = empty_str_cast(config_ini["uploads"]["AWS_S3_ENDPOINT_URL"])
|
||||||
|
|
||||||
# === OPTIONAL ===
|
# === OPTIONAL ===
|
||||||
REVERSE_PROXY: bool = empty_str_cast(config_ini["optional"]["REVERSE_PROXY"], default=False)
|
REVERSE_PROXY: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["REVERSE_PROXY"], default=False))
|
||||||
|
|
||||||
TEMPLATES_AUTO_RELOAD: bool = empty_str_cast(config_ini["optional"]["TEMPLATES_AUTO_RELOAD"], default=True)
|
TEMPLATES_AUTO_RELOAD: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["TEMPLATES_AUTO_RELOAD"], default=True))
|
||||||
|
|
||||||
THEME_FALLBACK: bool = empty_str_cast(config_ini["optional"]["THEME_FALLBACK"], default=False)
|
THEME_FALLBACK: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["THEME_FALLBACK"], default=False))
|
||||||
|
|
||||||
SQLALCHEMY_TRACK_MODIFICATIONS: bool = empty_str_cast(config_ini["optional"]["SQLALCHEMY_TRACK_MODIFICATIONS"], default=False)
|
SQLALCHEMY_TRACK_MODIFICATIONS: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["SQLALCHEMY_TRACK_MODIFICATIONS"], default=False))
|
||||||
|
|
||||||
SWAGGER_UI: bool = empty_str_cast(config_ini["optional"]["SWAGGER_UI"], default=False)
|
SWAGGER_UI: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["SWAGGER_UI"], default=False))
|
||||||
|
|
||||||
SWAGGER_UI_ENDPOINT: str = "/" if SWAGGER_UI else None
|
SWAGGER_UI_ENDPOINT: str = "/" if SWAGGER_UI else None
|
||||||
|
|
||||||
UPDATE_CHECK: bool = empty_str_cast(config_ini["optional"]["UPDATE_CHECK"], default=True)
|
UPDATE_CHECK: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["UPDATE_CHECK"], default=True))
|
||||||
|
|
||||||
APPLICATION_ROOT: str = empty_str_cast(config_ini["optional"]["APPLICATION_ROOT"], default="/")
|
APPLICATION_ROOT: str = empty_str_cast(config_ini["optional"]["APPLICATION_ROOT"], default="/")
|
||||||
|
|
||||||
SERVER_SENT_EVENTS: bool = empty_str_cast(config_ini["optional"]["SERVER_SENT_EVENTS"], default=True)
|
SERVER_SENT_EVENTS: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["SERVER_SENT_EVENTS"], default=True))
|
||||||
|
|
||||||
HTML_SANITIZATION: bool = empty_str_cast(config_ini["optional"]["HTML_SANITIZATION"], default=False)
|
HTML_SANITIZATION: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["HTML_SANITIZATION"], default=False))
|
||||||
|
|
||||||
if DATABASE_URL.startswith("sqlite") is False:
|
if DATABASE_URL.startswith("sqlite") is False:
|
||||||
SQLALCHEMY_ENGINE_OPTIONS = {
|
SQLALCHEMY_ENGINE_OPTIONS = {
|
||||||
|
|||||||
@@ -1,56 +1,97 @@
|
|||||||
# Bandit complains about security issues with lxml.
|
from pybluemonday import UGCPolicy
|
||||||
# These issues have been addressed in the past and do not apply to parsing HTML.
|
|
||||||
from lxml.html import html5parser, tostring # nosec B410
|
|
||||||
from lxml.html.clean import Cleaner # nosec B410
|
|
||||||
from lxml.html.defs import safe_attrs # nosec B410
|
|
||||||
|
|
||||||
cleaner = Cleaner(
|
# Copied from lxml:
|
||||||
comments=False,
|
# https://github.com/lxml/lxml/blob/e986a9cb5d54827c59aefa8803bc90954d67221e/src/lxml/html/defs.py#L38
|
||||||
page_structure=False,
|
# fmt: off
|
||||||
embedded=False,
|
SAFE_ATTRS = (
|
||||||
frames=False,
|
'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align',
|
||||||
forms=False,
|
'alt', 'axis', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff',
|
||||||
links=False,
|
'charset', 'checked', 'cite', 'class', 'clear', 'cols', 'colspan',
|
||||||
meta=False,
|
'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', 'enctype',
|
||||||
style=False,
|
'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace', 'id',
|
||||||
safe_attrs=(
|
'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method',
|
||||||
safe_attrs
|
'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly',
|
||||||
| {
|
'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape',
|
||||||
"style",
|
'size', 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
|
||||||
# Allow data attributes from bootstrap elements
|
'type', 'usemap', 'valign', 'value', 'vspace', 'width'
|
||||||
"data-toggle",
|
|
||||||
"data-target",
|
|
||||||
"data-dismiss",
|
|
||||||
"data-spy",
|
|
||||||
"data-offset",
|
|
||||||
"data-html",
|
|
||||||
"data-placement",
|
|
||||||
"data-parent",
|
|
||||||
"data-title",
|
|
||||||
"data-template",
|
|
||||||
"data-interval",
|
|
||||||
"data-keyboard",
|
|
||||||
"data-pause",
|
|
||||||
"data-ride",
|
|
||||||
"data-wrap",
|
|
||||||
"data-touch",
|
|
||||||
"data-flip",
|
|
||||||
"data-boundary",
|
|
||||||
"data-reference",
|
|
||||||
"data-display",
|
|
||||||
"data-animation",
|
|
||||||
"data-container",
|
|
||||||
"data-delay",
|
|
||||||
"data-selector",
|
|
||||||
"data-content",
|
|
||||||
"data-trigger",
|
|
||||||
}
|
|
||||||
),
|
|
||||||
annoying_tags=False,
|
|
||||||
)
|
)
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
|
PAGE_STRUCTURE_TAGS = {
|
||||||
|
"title": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
META_TAGS = {
|
||||||
|
"meta": ["name", "content", "property"],
|
||||||
|
}
|
||||||
|
|
||||||
|
FORM_TAGS = {
|
||||||
|
"form": ["method", "action"],
|
||||||
|
"button": ["name", "type", "value", "disabled"],
|
||||||
|
"input": ["name", "type", "value", "placeholder"],
|
||||||
|
"select": ["name", "value", "placeholder"],
|
||||||
|
"option": ["value"],
|
||||||
|
"textarea": ["name", "value", "placeholder"],
|
||||||
|
"label": ["for"],
|
||||||
|
}
|
||||||
|
|
||||||
|
ANNOYING_TAGS = {
|
||||||
|
"blink": [],
|
||||||
|
"marquee": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
MEDIA_TAGS = {
|
||||||
|
"audio": ["autoplay", "controls", "crossorigin", "loop", "muted", "preload", "src"],
|
||||||
|
"video": [
|
||||||
|
"autoplay",
|
||||||
|
"buffered",
|
||||||
|
"controls",
|
||||||
|
"crossorigin",
|
||||||
|
"loop",
|
||||||
|
"muted",
|
||||||
|
"playsinline",
|
||||||
|
"poster",
|
||||||
|
"preload",
|
||||||
|
"src",
|
||||||
|
],
|
||||||
|
"source": ["src", "type"],
|
||||||
|
"iframe": ["width", "height", "src", "frameborder", "allow", "allowfullscreen"],
|
||||||
|
}
|
||||||
|
|
||||||
|
SANITIZER = UGCPolicy()
|
||||||
|
|
||||||
|
for TAGS in (PAGE_STRUCTURE_TAGS, META_TAGS, FORM_TAGS, ANNOYING_TAGS, MEDIA_TAGS):
|
||||||
|
for element in TAGS:
|
||||||
|
SANITIZER.AllowElements(element)
|
||||||
|
SANITIZER.AllowAttrs(*TAGS[element]).OnElements(element)
|
||||||
|
|
||||||
|
# Allow safe attrs copied from lxml
|
||||||
|
SANITIZER.AllowAttrs(*SAFE_ATTRS).Globally()
|
||||||
|
|
||||||
|
# Allow styling globally
|
||||||
|
SANITIZER.AllowAttrs("class", "style").Globally()
|
||||||
|
|
||||||
|
# Allow styling via bluemonday
|
||||||
|
SANITIZER.AllowStyling()
|
||||||
|
|
||||||
|
# Allow safe convenience functions from bluemonday
|
||||||
|
SANITIZER.AllowStandardAttributes()
|
||||||
|
SANITIZER.AllowStandardURLs()
|
||||||
|
|
||||||
|
# Allow data atributes
|
||||||
|
SANITIZER.AllowDataAttributes()
|
||||||
|
|
||||||
|
# Allow data URI images
|
||||||
|
SANITIZER.AllowDataURIImages()
|
||||||
|
|
||||||
|
# Link security
|
||||||
|
SANITIZER.AllowRelativeURLs(True)
|
||||||
|
SANITIZER.RequireNoFollowOnFullyQualifiedLinks(True)
|
||||||
|
SANITIZER.RequireNoFollowOnLinks(True)
|
||||||
|
SANITIZER.RequireNoReferrerOnFullyQualifiedLinks(True)
|
||||||
|
SANITIZER.RequireNoReferrerOnLinks(True)
|
||||||
|
|
||||||
|
|
||||||
def sanitize_html(html):
|
def sanitize_html(html):
|
||||||
html = html5parser.fragment_fromstring(html, create_parent="div")
|
return SANITIZER.sanitize(html)
|
||||||
html = cleaner.clean_html(tostring(html)).decode()
|
|
||||||
return html
|
|
||||||
|
|||||||
@@ -24,9 +24,8 @@ marshmallow-sqlalchemy==0.17.0
|
|||||||
boto3==1.13.9
|
boto3==1.13.9
|
||||||
marshmallow==2.20.2
|
marshmallow==2.20.2
|
||||||
pydantic==1.5.1
|
pydantic==1.5.1
|
||||||
lxml==4.6.2
|
|
||||||
html5lib==1.0.1
|
|
||||||
WTForms==2.3.1
|
WTForms==2.3.1
|
||||||
python-geoacumen==0.0.1
|
python-geoacumen==0.0.1
|
||||||
maxminddb==1.5.4
|
maxminddb==1.5.4
|
||||||
tenacity==6.2.0
|
tenacity==6.2.0
|
||||||
|
pybluemonday==0.0.3
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ bcrypt==3.1.7 # via -r requirements.in
|
|||||||
boto3==1.13.9 # via -r requirements.in
|
boto3==1.13.9 # via -r requirements.in
|
||||||
botocore==1.16.26 # via boto3, s3transfer
|
botocore==1.16.26 # via boto3, s3transfer
|
||||||
certifi==2020.11.8 # via requests
|
certifi==2020.11.8 # via requests
|
||||||
cffi==1.14.3 # via bcrypt, cmarkgfm
|
cffi==1.14.3 # via bcrypt, cmarkgfm, pybluemonday
|
||||||
chardet==3.0.4 # via requests
|
chardet==3.0.4 # via requests
|
||||||
click==7.1.2 # via flask
|
click==7.1.2 # via flask
|
||||||
cmarkgfm==0.4.2 # via -r requirements.in
|
cmarkgfm==0.4.2 # via -r requirements.in
|
||||||
@@ -27,20 +27,19 @@ flask==1.1.2 # via -r requirements.in, flask-caching, flask-marshma
|
|||||||
gevent==20.9.0 # via -r requirements.in
|
gevent==20.9.0 # via -r requirements.in
|
||||||
greenlet==0.4.17 # via gevent
|
greenlet==0.4.17 # via gevent
|
||||||
gunicorn==20.0.4 # via -r requirements.in
|
gunicorn==20.0.4 # via -r requirements.in
|
||||||
html5lib==1.0.1 # via -r requirements.in
|
|
||||||
idna==2.10 # via requests
|
idna==2.10 # via requests
|
||||||
importlib-metadata==2.0.0 # via jsonschema
|
importlib-metadata==2.0.0 # via jsonschema
|
||||||
itsdangerous==1.1.0 # via -r requirements.in, flask
|
itsdangerous==1.1.0 # via -r requirements.in, flask
|
||||||
jinja2==2.11.2 # via -r requirements.in, flask
|
jinja2==2.11.2 # via -r requirements.in, flask
|
||||||
jmespath==0.10.0 # via boto3, botocore
|
jmespath==0.10.0 # via boto3, botocore
|
||||||
jsonschema==3.2.0 # via flask-restx
|
jsonschema==3.2.0 # via flask-restx
|
||||||
lxml==4.6.2 # via -r requirements.in
|
|
||||||
mako==1.1.3 # via alembic
|
mako==1.1.3 # via alembic
|
||||||
markupsafe==1.1.1 # via jinja2, mako, wtforms
|
markupsafe==1.1.1 # via jinja2, mako, wtforms
|
||||||
marshmallow-sqlalchemy==0.17.0 # via -r requirements.in
|
marshmallow-sqlalchemy==0.17.0 # via -r requirements.in
|
||||||
marshmallow==2.20.2 # via -r requirements.in, flask-marshmallow, marshmallow-sqlalchemy
|
marshmallow==2.20.2 # via -r requirements.in, flask-marshmallow, marshmallow-sqlalchemy
|
||||||
maxminddb==1.5.4 # via -r requirements.in, python-geoacumen
|
maxminddb==1.5.4 # via -r requirements.in, python-geoacumen
|
||||||
passlib==1.7.2 # via -r requirements.in
|
passlib==1.7.2 # via -r requirements.in
|
||||||
|
pybluemonday==0.0.3 # via -r requirements.in
|
||||||
pycparser==2.20 # via cffi
|
pycparser==2.20 # via cffi
|
||||||
pydantic==1.5.1 # via -r requirements.in
|
pydantic==1.5.1 # via -r requirements.in
|
||||||
pymysql==0.9.3 # via -r requirements.in
|
pymysql==0.9.3 # via -r requirements.in
|
||||||
@@ -53,12 +52,11 @@ pytz==2020.4 # via flask-restx
|
|||||||
redis==3.5.2 # via -r requirements.in
|
redis==3.5.2 # via -r requirements.in
|
||||||
requests==2.23.0 # via -r requirements.in
|
requests==2.23.0 # via -r requirements.in
|
||||||
s3transfer==0.3.3 # via boto3
|
s3transfer==0.3.3 # via boto3
|
||||||
six==1.15.0 # via bcrypt, flask-marshmallow, flask-restx, html5lib, jsonschema, python-dateutil, sqlalchemy-utils, tenacity
|
six==1.15.0 # via bcrypt, flask-marshmallow, flask-restx, jsonschema, python-dateutil, sqlalchemy-utils, tenacity
|
||||||
sqlalchemy-utils==0.36.6 # via -r requirements.in
|
sqlalchemy-utils==0.36.6 # via -r requirements.in
|
||||||
sqlalchemy==1.3.17 # via -r requirements.in, alembic, dataset, flask-sqlalchemy, marshmallow-sqlalchemy, sqlalchemy-utils
|
sqlalchemy==1.3.17 # via -r requirements.in, alembic, dataset, flask-sqlalchemy, marshmallow-sqlalchemy, sqlalchemy-utils
|
||||||
tenacity==6.2.0 # via -r requirements.in
|
tenacity==6.2.0 # via -r requirements.in
|
||||||
urllib3==1.25.11 # via botocore, requests
|
urllib3==1.25.11 # via botocore, requests
|
||||||
webencodings==0.5.1 # via html5lib
|
|
||||||
werkzeug==1.0.1 # via -r requirements.in, flask, flask-restx
|
werkzeug==1.0.1 # via -r requirements.in, flask, flask-restx
|
||||||
wtforms==2.3.1 # via -r requirements.in
|
wtforms==2.3.1 # via -r requirements.in
|
||||||
zipp==3.4.0 # via importlib-metadata
|
zipp==3.4.0 # via importlib-metadata
|
||||||
|
|||||||
Reference in New Issue
Block a user