diff --git a/bookwyrm/migrations/0025_auto_20201217_0046.py b/bookwyrm/migrations/0025_auto_20201217_0046.py
new file mode 100644
index 00000000..a3ffe8c1
--- /dev/null
+++ b/bookwyrm/migrations/0025_auto_20201217_0046.py
@@ -0,0 +1,39 @@
+# Generated by Django 3.0.7 on 2020-12-17 00:46
+
+import bookwyrm.models.fields
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('bookwyrm', '0024_merge_20201216_1721'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='author',
+ name='bio',
+ field=bookwyrm.models.fields.HtmlField(blank=True, null=True),
+ ),
+ migrations.AlterField(
+ model_name='book',
+ name='description',
+ field=bookwyrm.models.fields.HtmlField(blank=True, null=True),
+ ),
+ migrations.AlterField(
+ model_name='quotation',
+ name='quote',
+ field=bookwyrm.models.fields.HtmlField(),
+ ),
+ migrations.AlterField(
+ model_name='status',
+ name='content',
+ field=bookwyrm.models.fields.HtmlField(blank=True, null=True),
+ ),
+ migrations.AlterField(
+ model_name='user',
+ name='summary',
+ field=bookwyrm.models.fields.HtmlField(default=''),
+ ),
+ ]
diff --git a/bookwyrm/models/author.py b/bookwyrm/models/author.py
index 331d2dd6..47714d4e 100644
--- a/bookwyrm/models/author.py
+++ b/bookwyrm/models/author.py
@@ -25,7 +25,7 @@ class Author(ActivitypubMixin, BookWyrmModel):
aliases = fields.ArrayField(
models.CharField(max_length=255), blank=True, default=list
)
- bio = fields.TextField(null=True, blank=True)
+ bio = fields.HtmlField(null=True, blank=True)
def save(self, *args, **kwargs):
''' can't be abstract for query reasons, but you shouldn't USE it '''
diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py
index bcd4bc04..3080a115 100644
--- a/bookwyrm/models/book.py
+++ b/bookwyrm/models/book.py
@@ -36,7 +36,7 @@ class Book(ActivitypubMixin, BookWyrmModel):
title = fields.CharField(max_length=255)
sort_title = fields.CharField(max_length=255, blank=True, null=True)
subtitle = fields.CharField(max_length=255, blank=True, null=True)
- description = fields.TextField(blank=True, null=True)
+ description = fields.HtmlField(blank=True, null=True)
languages = fields.ArrayField(
models.CharField(max_length=255), blank=True, default=list
)
diff --git a/bookwyrm/models/fields.py b/bookwyrm/models/fields.py
index 5e12f5d5..52933715 100644
--- a/bookwyrm/models/fields.py
+++ b/bookwyrm/models/fields.py
@@ -12,6 +12,7 @@ from django.db import models
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
from bookwyrm import activitypub
+from bookwyrm.sanitize_html import InputHtmlParser
from bookwyrm.settings import DOMAIN
from bookwyrm.connectors import get_image
@@ -362,6 +363,15 @@ class DateTimeField(ActivitypubFieldMixin, models.DateTimeField):
except (ParserError, TypeError):
return None
+class HtmlField(ActivitypubFieldMixin, models.TextField):
+ ''' a text field for storing html '''
+ def field_from_activity(self, value):
+ if not value or value == MISSING:
+ return None
+ sanitizer = InputHtmlParser()
+ sanitizer.feed(value)
+ return sanitizer.get_output()
+
class ArrayField(ActivitypubFieldMixin, DjangoArrayField):
''' activitypub-aware array field '''
def field_to_activity(self, value):
diff --git a/bookwyrm/models/status.py b/bookwyrm/models/status.py
index fcf4a290..66114e7c 100644
--- a/bookwyrm/models/status.py
+++ b/bookwyrm/models/status.py
@@ -14,7 +14,7 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel):
''' any post, like a reply to a review, etc '''
user = fields.ForeignKey(
'User', on_delete=models.PROTECT, activitypub_field='attributedTo')
- content = fields.TextField(blank=True, null=True)
+ content = fields.HtmlField(blank=True, null=True)
mention_users = fields.TagField('User', related_name='mention_user')
mention_books = fields.TagField('Edition', related_name='mention_book')
local = models.BooleanField(default=True)
@@ -134,7 +134,7 @@ class Comment(Status):
class Quotation(Status):
''' like a review but without a rating and transient '''
- quote = fields.TextField()
+ quote = fields.HtmlField()
book = fields.ForeignKey(
'Edition', on_delete=models.PROTECT, activitypub_field='inReplyToBook')
diff --git a/bookwyrm/models/user.py b/bookwyrm/models/user.py
index 63549d36..9d66eb5c 100644
--- a/bookwyrm/models/user.py
+++ b/bookwyrm/models/user.py
@@ -42,7 +42,7 @@ class User(OrderedCollectionPageMixin, AbstractUser):
blank=True,
)
outbox = fields.RemoteIdField(unique=True)
- summary = fields.TextField(default='')
+ summary = fields.HtmlField(default='')
local = models.BooleanField(default=False)
bookwyrm_user = fields.BooleanField(default=True)
localname = models.CharField(
diff --git a/bookwyrm/sanitize_html.py b/bookwyrm/sanitize_html.py
index 9c5ca73a..933fc43c 100644
--- a/bookwyrm/sanitize_html.py
+++ b/bookwyrm/sanitize_html.py
@@ -1,7 +1,7 @@
''' html parser to clean up incoming text from unknown sources '''
from html.parser import HTMLParser
-class InputHtmlParser(HTMLParser):
+class InputHtmlParser(HTMLParser):#pylint: disable=abstract-method
''' Removes any html that isn't allowed_tagsed from a block '''
def __init__(self):
diff --git a/bookwyrm/tests/test_sanitize_html.py b/bookwyrm/tests/test_sanitize_html.py
index 3344a934..58d94311 100644
--- a/bookwyrm/tests/test_sanitize_html.py
+++ b/bookwyrm/tests/test_sanitize_html.py
@@ -1,34 +1,36 @@
+''' make sure only valid html gets to the app '''
from django.test import TestCase
from bookwyrm.sanitize_html import InputHtmlParser
-
class Sanitizer(TestCase):
+ ''' sanitizer tests '''
def test_no_html(self):
+ ''' just text '''
input_text = 'no html '
parser = InputHtmlParser()
parser.feed(input_text)
output = parser.get_output()
self.assertEqual(input_text, output)
-
def test_valid_html(self):
+ ''' leave the html untouched '''
input_text = 'yes html'
parser = InputHtmlParser()
parser.feed(input_text)
output = parser.get_output()
self.assertEqual(input_text, output)
-
def test_valid_html_attrs(self):
+ ''' and don't remove attributes '''
input_text = 'yes html'
parser = InputHtmlParser()
parser.feed(input_text)
output = parser.get_output()
self.assertEqual(input_text, output)
-
def test_invalid_html(self):
+ ''' remove all html when the html is malformed '''
input_text = 'yes html'
parser = InputHtmlParser()
parser.feed(input_text)
@@ -41,8 +43,8 @@ class Sanitizer(TestCase):
output = parser.get_output()
self.assertEqual('yes html ', output)
-
def test_disallowed_html(self):
+ ''' remove disallowed html but keep allowed html '''
input_text = '
yes html
'
parser = InputHtmlParser()
parser.feed(input_text)