Adds allowlist for html attrs

This commit is contained in:
Mouse Reeve
2022-02-03 13:15:06 -08:00
parent 3b48d986d5
commit 1f6ecc39ac
2 changed files with 23 additions and 2 deletions

View File

@ -22,6 +22,9 @@ class InputHtmlParser(HTMLParser): # pylint: disable=abstract-method
"ol",
"li",
]
self.allowed_attrs = [
"href", "rel", "src", "alt"
]
self.tag_stack = []
self.output = []
# if the html appears invalid, we just won't allow any at all
@ -30,7 +33,14 @@ class InputHtmlParser(HTMLParser): # pylint: disable=abstract-method
def handle_starttag(self, tag, attrs):
"""check if the tag is valid"""
if self.allow_html and tag in self.allowed_tags:
self.output.append(("tag", self.get_starttag_text()))
allowed_attrs = " ".join(
f'{a}="{v}"' for a, v in attrs if a in self.allowed_attrs
)
reconstructed = f'<{tag}'
if allowed_attrs:
reconstructed += " " + allowed_attrs
reconstructed += ">"
self.output.append(("tag", reconstructed))
self.tag_stack.append(tag)
else:
self.output.append(("data", ""))