Skip to content

Commit 7c79c90

Browse files
committed
Sanitize HTML after rendering markdown
1 parent e6018cd commit 7c79c90

File tree

4 files changed

+42
-13
lines changed

4 files changed

+42
-13
lines changed

base_requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,7 @@ tablib
125125
# Timezone data (required by django-timezone-field on Python 3.9+)
126126
# https://github.com/python/tzdata
127127
tzdata
128+
129+
# HTML sanitizer
130+
# https://github.com/mozilla/bleach
131+
bleach

netbox/utilities/templatetags/builtins/filters.py

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from netbox.config import get_config
1313
from utilities.markdown import StrikethroughExtension
14-
from utilities.utils import foreground_color
14+
from utilities.utils import clean_html, foreground_color
1515

1616
register = template.Library()
1717

@@ -144,18 +144,6 @@ def render_markdown(value):
144144
145145
{{ md_source_text|markdown }}
146146
"""
147-
schemes = '|'.join(get_config().ALLOWED_URL_SCHEMES)
148-
149-
# Strip HTML tags
150-
value = strip_tags(value)
151-
152-
# Sanitize Markdown links
153-
pattern = fr'\[([^\]]+)\]\(\s*(?!({schemes})).*:(.+)\)'
154-
value = re.sub(pattern, '[\\1](\\3)', value, flags=re.IGNORECASE)
155-
156-
# Sanitize Markdown reference links
157-
pattern = fr'\[([^\]]+)\]:\s*(?!({schemes}))\w*:(.+)'
158-
value = re.sub(pattern, '[\\1]: \\3', value, flags=re.IGNORECASE)
159147

160148
# Render Markdown
161149
html = markdown(value, extensions=['def_list', 'fenced_code', 'tables', StrikethroughExtension()])
@@ -164,6 +152,11 @@ def render_markdown(value):
164152
if html:
165153
html = f'<div class="rendered-markdown">{html}</div>'
166154

155+
schemes = get_config().ALLOWED_URL_SCHEMES
156+
157+
# Sanitize HTML
158+
html = clean_html(html, schemes)
159+
167160
return mark_safe(html)
168161

169162

netbox/utilities/utils.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from decimal import Decimal
55
from itertools import count, groupby
66

7+
import bleach
78
from django.core.serializers import serialize
89
from django.db.models import Count, OuterRef, Subquery
910
from django.db.models.functions import Coalesce
@@ -385,3 +386,33 @@ def copy_safe_request(request):
385386
'path': request.path,
386387
'id': getattr(request, 'id', None), # UUID assigned by middleware
387388
})
389+
390+
391+
def clean_html(html, schemes):
392+
"""
393+
Sanitizes HTML based on a whitelist of allowed tags and attributes.
394+
Also takes a list of allowed URI schemes.
395+
"""
396+
397+
ALLOWED_TAGS = [
398+
"div", "pre", "code", "blockquote", "del",
399+
"hr", "h1", "h2", "h3", "h4", "h5", "h6",
400+
"ul", "ol", "li", "p", "br",
401+
"strong", "em", "a", "b", "i", "img",
402+
"table", "thead", "tbody", "tr", "th", "td",
403+
"dl", "dt", "dd",
404+
]
405+
406+
ALLOWED_ATTRIBUTES = {
407+
"div": ['class'],
408+
"h1": ["id"], "h2": ["id"], "h3": ["id"], "h4": ["id"], "h5": ["id"], "h6": ["id"],
409+
"a": ["href", "title"],
410+
"img": ["src", "title", "alt"],
411+
}
412+
413+
return bleach.clean(
414+
html,
415+
tags=ALLOWED_TAGS,
416+
attributes=ALLOWED_ATTRIBUTES,
417+
protocols=schemes
418+
)

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
bleach==5.0.0
12
Django==4.0.4
23
django-cors-headers==3.12.0
34
django-debug-toolbar==3.2.4

0 commit comments

Comments
 (0)