diff --git a/CHANGELOG.md b/CHANGELOG.md index df026db1..2142e8f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,12 +46,16 @@ **Deployment** +- Fix boolean configs from the `config.ini` optional section - Install `python3-dev` instead of `python-dev` in apt -- Bump lxml to 4.6.2 +- Require `pybluemonday` as pip dependency +- Remove `lxml` and `html5lib` from pip dependencies - Bump pip-compile to 5.4.0 **Miscellaneous** +- Rewrite the HTML santiziation feature (controlled by `HTML_SANITIZATION`) to use the pybluemonday library instead of lxml/html5lib + - Note that this feature is still in beta - Cache Docker builds more by copying and installing Python dependencies before copying CTFd - Change the default emails slightly and rework confirmation email page to make some recommendations clearer - Use `examplectf.com` as testing/development domain instead of `ctfd.io` diff --git a/CTFd/config.py b/CTFd/config.py index a06b9da0..fd71343a 100644 --- a/CTFd/config.py +++ b/CTFd/config.py @@ -174,25 +174,25 @@ class ServerConfig(object): AWS_S3_ENDPOINT_URL: str = empty_str_cast(config_ini["uploads"]["AWS_S3_ENDPOINT_URL"]) # === OPTIONAL === - REVERSE_PROXY: bool = empty_str_cast(config_ini["optional"]["REVERSE_PROXY"], default=False) + REVERSE_PROXY: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["REVERSE_PROXY"], default=False)) - TEMPLATES_AUTO_RELOAD: bool = empty_str_cast(config_ini["optional"]["TEMPLATES_AUTO_RELOAD"], default=True) + TEMPLATES_AUTO_RELOAD: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["TEMPLATES_AUTO_RELOAD"], default=True)) - THEME_FALLBACK: bool = empty_str_cast(config_ini["optional"]["THEME_FALLBACK"], default=False) + THEME_FALLBACK: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["THEME_FALLBACK"], default=False)) - SQLALCHEMY_TRACK_MODIFICATIONS: bool = empty_str_cast(config_ini["optional"]["SQLALCHEMY_TRACK_MODIFICATIONS"], default=False) + SQLALCHEMY_TRACK_MODIFICATIONS: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["SQLALCHEMY_TRACK_MODIFICATIONS"], default=False)) - SWAGGER_UI: bool = empty_str_cast(config_ini["optional"]["SWAGGER_UI"], default=False) + SWAGGER_UI: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["SWAGGER_UI"], default=False)) SWAGGER_UI_ENDPOINT: str = "/" if SWAGGER_UI else None - UPDATE_CHECK: bool = empty_str_cast(config_ini["optional"]["UPDATE_CHECK"], default=True) + UPDATE_CHECK: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["UPDATE_CHECK"], default=True)) APPLICATION_ROOT: str = empty_str_cast(config_ini["optional"]["APPLICATION_ROOT"], default="/") - SERVER_SENT_EVENTS: bool = empty_str_cast(config_ini["optional"]["SERVER_SENT_EVENTS"], default=True) + SERVER_SENT_EVENTS: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["SERVER_SENT_EVENTS"], default=True)) - HTML_SANITIZATION: bool = empty_str_cast(config_ini["optional"]["HTML_SANITIZATION"], default=False) + HTML_SANITIZATION: bool = process_boolean_str(empty_str_cast(config_ini["optional"]["HTML_SANITIZATION"], default=False)) if DATABASE_URL.startswith("sqlite") is False: SQLALCHEMY_ENGINE_OPTIONS = { diff --git a/CTFd/utils/security/sanitize.py b/CTFd/utils/security/sanitize.py index 86bed39a..0cef2532 100644 --- a/CTFd/utils/security/sanitize.py +++ b/CTFd/utils/security/sanitize.py @@ -1,56 +1,97 @@ -# Bandit complains about security issues with lxml. -# These issues have been addressed in the past and do not apply to parsing HTML. -from lxml.html import html5parser, tostring # nosec B410 -from lxml.html.clean import Cleaner # nosec B410 -from lxml.html.defs import safe_attrs # nosec B410 +from pybluemonday import UGCPolicy -cleaner = Cleaner( - comments=False, - page_structure=False, - embedded=False, - frames=False, - forms=False, - links=False, - meta=False, - style=False, - safe_attrs=( - safe_attrs - | { - "style", - # Allow data attributes from bootstrap elements - "data-toggle", - "data-target", - "data-dismiss", - "data-spy", - "data-offset", - "data-html", - "data-placement", - "data-parent", - "data-title", - "data-template", - "data-interval", - "data-keyboard", - "data-pause", - "data-ride", - "data-wrap", - "data-touch", - "data-flip", - "data-boundary", - "data-reference", - "data-display", - "data-animation", - "data-container", - "data-delay", - "data-selector", - "data-content", - "data-trigger", - } - ), - annoying_tags=False, +# Copied from lxml: +# https://github.com/lxml/lxml/blob/e986a9cb5d54827c59aefa8803bc90954d67221e/src/lxml/html/defs.py#L38 +# fmt: off +SAFE_ATTRS = ( + 'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align', + 'alt', 'axis', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff', + 'charset', 'checked', 'cite', 'class', 'clear', 'cols', 'colspan', + 'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', 'enctype', + 'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace', 'id', + 'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method', + 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', + 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', + 'size', 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', + 'type', 'usemap', 'valign', 'value', 'vspace', 'width' ) +# fmt: on + +PAGE_STRUCTURE_TAGS = { + "title": [], +} + +META_TAGS = { + "meta": ["name", "content", "property"], +} + +FORM_TAGS = { + "form": ["method", "action"], + "button": ["name", "type", "value", "disabled"], + "input": ["name", "type", "value", "placeholder"], + "select": ["name", "value", "placeholder"], + "option": ["value"], + "textarea": ["name", "value", "placeholder"], + "label": ["for"], +} + +ANNOYING_TAGS = { + "blink": [], + "marquee": [], +} + + +MEDIA_TAGS = { + "audio": ["autoplay", "controls", "crossorigin", "loop", "muted", "preload", "src"], + "video": [ + "autoplay", + "buffered", + "controls", + "crossorigin", + "loop", + "muted", + "playsinline", + "poster", + "preload", + "src", + ], + "source": ["src", "type"], + "iframe": ["width", "height", "src", "frameborder", "allow", "allowfullscreen"], +} + +SANITIZER = UGCPolicy() + +for TAGS in (PAGE_STRUCTURE_TAGS, META_TAGS, FORM_TAGS, ANNOYING_TAGS, MEDIA_TAGS): + for element in TAGS: + SANITIZER.AllowElements(element) + SANITIZER.AllowAttrs(*TAGS[element]).OnElements(element) + +# Allow safe attrs copied from lxml +SANITIZER.AllowAttrs(*SAFE_ATTRS).Globally() + +# Allow styling globally +SANITIZER.AllowAttrs("class", "style").Globally() + +# Allow styling via bluemonday +SANITIZER.AllowStyling() + +# Allow safe convenience functions from bluemonday +SANITIZER.AllowStandardAttributes() +SANITIZER.AllowStandardURLs() + +# Allow data atributes +SANITIZER.AllowDataAttributes() + +# Allow data URI images +SANITIZER.AllowDataURIImages() + +# Link security +SANITIZER.AllowRelativeURLs(True) +SANITIZER.RequireNoFollowOnFullyQualifiedLinks(True) +SANITIZER.RequireNoFollowOnLinks(True) +SANITIZER.RequireNoReferrerOnFullyQualifiedLinks(True) +SANITIZER.RequireNoReferrerOnLinks(True) def sanitize_html(html): - html = html5parser.fragment_fromstring(html, create_parent="div") - html = cleaner.clean_html(tostring(html)).decode() - return html + return SANITIZER.sanitize(html) diff --git a/requirements.in b/requirements.in index 7a1f0e5d..c556e5ba 100644 --- a/requirements.in +++ b/requirements.in @@ -24,9 +24,8 @@ marshmallow-sqlalchemy==0.17.0 boto3==1.13.9 marshmallow==2.20.2 pydantic==1.5.1 -lxml==4.6.2 -html5lib==1.0.1 WTForms==2.3.1 python-geoacumen==0.0.1 maxminddb==1.5.4 tenacity==6.2.0 +pybluemonday==0.0.3 diff --git a/requirements.txt b/requirements.txt index 09fb4bcd..932bb119 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,7 +11,7 @@ bcrypt==3.1.7 # via -r requirements.in boto3==1.13.9 # via -r requirements.in botocore==1.16.26 # via boto3, s3transfer certifi==2020.11.8 # via requests -cffi==1.14.3 # via bcrypt, cmarkgfm +cffi==1.14.3 # via bcrypt, cmarkgfm, pybluemonday chardet==3.0.4 # via requests click==7.1.2 # via flask cmarkgfm==0.4.2 # via -r requirements.in @@ -27,20 +27,19 @@ flask==1.1.2 # via -r requirements.in, flask-caching, flask-marshma gevent==20.9.0 # via -r requirements.in greenlet==0.4.17 # via gevent gunicorn==20.0.4 # via -r requirements.in -html5lib==1.0.1 # via -r requirements.in idna==2.10 # via requests importlib-metadata==2.0.0 # via jsonschema itsdangerous==1.1.0 # via -r requirements.in, flask jinja2==2.11.2 # via -r requirements.in, flask jmespath==0.10.0 # via boto3, botocore jsonschema==3.2.0 # via flask-restx -lxml==4.6.2 # via -r requirements.in mako==1.1.3 # via alembic markupsafe==1.1.1 # via jinja2, mako, wtforms marshmallow-sqlalchemy==0.17.0 # via -r requirements.in marshmallow==2.20.2 # via -r requirements.in, flask-marshmallow, marshmallow-sqlalchemy maxminddb==1.5.4 # via -r requirements.in, python-geoacumen passlib==1.7.2 # via -r requirements.in +pybluemonday==0.0.3 # via -r requirements.in pycparser==2.20 # via cffi pydantic==1.5.1 # via -r requirements.in pymysql==0.9.3 # via -r requirements.in @@ -53,12 +52,11 @@ pytz==2020.4 # via flask-restx redis==3.5.2 # via -r requirements.in requests==2.23.0 # via -r requirements.in s3transfer==0.3.3 # via boto3 -six==1.15.0 # via bcrypt, flask-marshmallow, flask-restx, html5lib, jsonschema, python-dateutil, sqlalchemy-utils, tenacity +six==1.15.0 # via bcrypt, flask-marshmallow, flask-restx, jsonschema, python-dateutil, sqlalchemy-utils, tenacity sqlalchemy-utils==0.36.6 # via -r requirements.in sqlalchemy==1.3.17 # via -r requirements.in, alembic, dataset, flask-sqlalchemy, marshmallow-sqlalchemy, sqlalchemy-utils tenacity==6.2.0 # via -r requirements.in urllib3==1.25.11 # via botocore, requests -webencodings==0.5.1 # via html5lib werkzeug==1.0.1 # via -r requirements.in, flask, flask-restx wtforms==2.3.1 # via -r requirements.in zipp==3.4.0 # via importlib-metadata