Sanitize incoming html

2020-12-16 16:47:05 -08:00
parent d79a756813
commit a3c7d324d6
8 changed files with 62 additions and 11 deletions
--- a/bookwyrm/tests/test_sanitize_html.py
+++ b/bookwyrm/tests/test_sanitize_html.py
@@ -1,34 +1,36 @@
+''' make sure only valid html gets to the app '''
 from django.test import TestCase

 from bookwyrm.sanitize_html import InputHtmlParser

-
 class Sanitizer(TestCase):
+    ''' sanitizer tests '''
    def test_no_html(self):
+        ''' just text '''
        input_text = 'no      html  '
        parser = InputHtmlParser()
        parser.feed(input_text)
        output = parser.get_output()
        self.assertEqual(input_text, output)

-
    def test_valid_html(self):
+        ''' leave the html untouched '''
        input_text = '<b>yes    </b> <i>html</i>'
        parser = InputHtmlParser()
        parser.feed(input_text)
        output = parser.get_output()
        self.assertEqual(input_text, output)

-
    def test_valid_html_attrs(self):
+        ''' and don't remove attributes '''
        input_text = '<a href="fish.com">yes    </a> <i>html</i>'
        parser = InputHtmlParser()
        parser.feed(input_text)
        output = parser.get_output()
        self.assertEqual(input_text, output)

-
    def test_invalid_html(self):
+        ''' remove all html when the html is malformed '''
        input_text = '<b>yes  <i>html</i>'
        parser = InputHtmlParser()
        parser.feed(input_text)
@@ -41,8 +43,8 @@ class Sanitizer(TestCase):
        output = parser.get_output()
        self.assertEqual('yes html   ', output)

-
    def test_disallowed_html(self):
+        ''' remove disallowed html but keep allowed html '''
        input_text = '<div>  yes <i>html</i></div>'
        parser = InputHtmlParser()
        parser.feed(input_text)