Merge branch 'main' into isni-poc

This commit is contained in:
Hugh Rundle
2021-11-22 08:53:58 +11:00
142 changed files with 14141 additions and 3230 deletions

View File

@ -20,7 +20,7 @@ from django.utils.http import http_date
from bookwyrm import activitypub
from bookwyrm.settings import USER_AGENT, PAGE_LENGTH
from bookwyrm.signatures import make_signature, make_digest
from bookwyrm.tasks import app
from bookwyrm.tasks import app, MEDIUM
from bookwyrm.models.fields import ImageField, ManyToManyField
logger = logging.getLogger(__name__)
@ -29,7 +29,6 @@ logger = logging.getLogger(__name__)
PropertyField = namedtuple("PropertyField", ("set_activity_from_field"))
# pylint: disable=invalid-name
def set_activity_from_property_field(activity, obj, field):
"""assign a model property value to the activity json"""
@ -126,12 +125,15 @@ class ActivitypubMixin:
# there OUGHT to be only one match
return match.first()
def broadcast(self, activity, sender, software=None):
def broadcast(self, activity, sender, software=None, queue=MEDIUM):
"""send out an activity"""
broadcast_task.delay(
sender.id,
json.dumps(activity, cls=activitypub.ActivityEncoder),
self.get_recipients(software=software),
broadcast_task.apply_async(
args=(
sender.id,
json.dumps(activity, cls=activitypub.ActivityEncoder),
self.get_recipients(software=software),
),
queue=queue,
)
def get_recipients(self, software=None):
@ -195,7 +197,7 @@ class ActivitypubMixin:
class ObjectMixin(ActivitypubMixin):
"""add this mixin for object models that are AP serializable"""
def save(self, *args, created=None, **kwargs):
def save(self, *args, created=None, software=None, priority=MEDIUM, **kwargs):
"""broadcast created/updated/deleted objects as appropriate"""
broadcast = kwargs.get("broadcast", True)
# this bonus kwarg would cause an error in the base save method
@ -219,15 +221,17 @@ class ObjectMixin(ActivitypubMixin):
return
try:
software = None
# do we have a "pure" activitypub version of this for mastodon?
if hasattr(self, "pure_content"):
if software != "bookwyrm" and hasattr(self, "pure_content"):
pure_activity = self.to_create_activity(user, pure=True)
self.broadcast(pure_activity, user, software="other")
self.broadcast(
pure_activity, user, software="other", queue=priority
)
# set bookwyrm so that that type is also sent
software = "bookwyrm"
# sends to BW only if we just did a pure version for masto
activity = self.to_create_activity(user)
self.broadcast(activity, user, software=software)
self.broadcast(activity, user, software=software, queue=priority)
except AttributeError:
# janky as heck, this catches the mutliple inheritence chain
# for boosts and ignores this auxilliary broadcast
@ -241,8 +245,7 @@ class ObjectMixin(ActivitypubMixin):
if isinstance(self, user_model):
user = self
# book data tracks last editor
elif hasattr(self, "last_edited_by"):
user = self.last_edited_by
user = user or getattr(self, "last_edited_by", None)
# again, if we don't know the user or they're remote, don't bother
if not user or not user.local:
return
@ -252,7 +255,7 @@ class ObjectMixin(ActivitypubMixin):
activity = self.to_delete_activity(user)
else:
activity = self.to_update_activity(user)
self.broadcast(activity, user)
self.broadcast(activity, user, queue=priority)
def to_create_activity(self, user, **kwargs):
"""returns the object wrapped in a Create activity"""
@ -375,9 +378,9 @@ class CollectionItemMixin(ActivitypubMixin):
activity_serializer = activitypub.CollectionItem
def broadcast(self, activity, sender, software="bookwyrm"):
def broadcast(self, activity, sender, software="bookwyrm", queue=MEDIUM):
"""only send book collection updates to other bookwyrm instances"""
super().broadcast(activity, sender, software=software)
super().broadcast(activity, sender, software=software, queue=queue)
@property
def privacy(self):
@ -396,7 +399,7 @@ class CollectionItemMixin(ActivitypubMixin):
return []
return [collection_field.user]
def save(self, *args, broadcast=True, **kwargs):
def save(self, *args, broadcast=True, priority=MEDIUM, **kwargs):
"""broadcast updated"""
# first off, we want to save normally no matter what
super().save(*args, **kwargs)
@ -407,7 +410,7 @@ class CollectionItemMixin(ActivitypubMixin):
# adding an obj to the collection
activity = self.to_add_activity(self.user)
self.broadcast(activity, self.user)
self.broadcast(activity, self.user, queue=priority)
def delete(self, *args, broadcast=True, **kwargs):
"""broadcast a remove activity"""
@ -440,12 +443,12 @@ class CollectionItemMixin(ActivitypubMixin):
class ActivityMixin(ActivitypubMixin):
"""add this mixin for models that are AP serializable"""
def save(self, *args, broadcast=True, **kwargs):
def save(self, *args, broadcast=True, priority=MEDIUM, **kwargs):
"""broadcast activity"""
super().save(*args, **kwargs)
user = self.user if hasattr(self, "user") else self.user_subject
if broadcast and user.local:
self.broadcast(self.to_activity(), user)
self.broadcast(self.to_activity(), user, queue=priority)
def delete(self, *args, broadcast=True, **kwargs):
"""nevermind, undo that activity"""
@ -502,7 +505,7 @@ def unfurl_related_field(related_field, sort_field=None):
return related_field.remote_id
@app.task(queue="medium_priority")
@app.task(queue=MEDIUM)
def broadcast_task(sender_id, activity, recipients):
"""the celery task for broadcast"""
user_model = apps.get_model("bookwyrm.User", require_ready=True)

View File

@ -66,9 +66,10 @@ class BookDataModel(ObjectMixin, BookWyrmModel):
self.remote_id = None
return super().save(*args, **kwargs)
def broadcast(self, activity, sender, software="bookwyrm"):
# pylint: disable=arguments-differ
def broadcast(self, activity, sender, software="bookwyrm", **kwargs):
"""only send book data updates to other bookwyrm instances"""
super().broadcast(activity, sender, software=software)
super().broadcast(activity, sender, software=software, **kwargs)
class Book(BookDataModel):

View File

@ -3,6 +3,7 @@ from dataclasses import MISSING
import imghdr
import re
from uuid import uuid4
from urllib.parse import urljoin
import dateutil.parser
from dateutil.parser import ParserError
@ -13,11 +14,12 @@ from django.db import models
from django.forms import ClearableFileInput, ImageField as DjangoImageField
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
from django.utils.encoding import filepath_to_uri
from bookwyrm import activitypub
from bookwyrm.connectors import get_image
from bookwyrm.sanitize_html import InputHtmlParser
from bookwyrm.settings import DOMAIN
from bookwyrm.settings import MEDIA_FULL_URL
def validate_remote_id(value):
@ -381,17 +383,6 @@ class CustomImageField(DjangoImageField):
widget = ClearableFileInputWithWarning
def image_serializer(value, alt):
"""helper for serializing images"""
if value and hasattr(value, "url"):
url = value.url
else:
return None
if not url[:4] == "http":
url = f"https://{DOMAIN}{url}"
return activitypub.Document(url=url, name=alt)
class ImageField(ActivitypubFieldMixin, models.ImageField):
"""activitypub-aware image field"""
@ -424,7 +415,12 @@ class ImageField(ActivitypubFieldMixin, models.ImageField):
activity[key] = formatted
def field_to_activity(self, value, alt=None):
return image_serializer(value, alt)
url = get_absolute_url(value)
if not url:
return None
return activitypub.Document(url=url, name=alt)
def field_from_activity(self, value):
image_slug = value
@ -461,6 +457,20 @@ class ImageField(ActivitypubFieldMixin, models.ImageField):
)
def get_absolute_url(value):
"""returns an absolute URL for the image"""
name = getattr(value, "name")
if not name:
return None
url = filepath_to_uri(name)
if url is not None:
url = url.lstrip("/")
url = urljoin(MEDIA_FULL_URL, url)
return url
class DateTimeField(ActivitypubFieldMixin, models.DateTimeField):
"""activitypub-aware datetime field"""

View File

@ -6,20 +6,14 @@ from django.db import models
from django.utils import timezone
from bookwyrm.connectors import connector_manager
from bookwyrm.models import ReadThrough, User, Book
from bookwyrm.models import ReadThrough, User, Book, Edition
from .fields import PrivacyLevels
# Mapping goodreads -> bookwyrm shelf titles.
GOODREADS_SHELVES = {
"read": "read",
"currently-reading": "reading",
"to-read": "to-read",
}
def unquote_string(text):
"""resolve csv quote weirdness"""
if not text:
return None
match = re.match(r'="([^"]*)"', text)
if match:
return match.group(1)
@ -41,14 +35,21 @@ class ImportJob(models.Model):
user = models.ForeignKey(User, on_delete=models.CASCADE)
created_date = models.DateTimeField(default=timezone.now)
task_id = models.CharField(max_length=100, null=True)
updated_date = models.DateTimeField(default=timezone.now)
include_reviews = models.BooleanField(default=True)
mappings = models.JSONField()
complete = models.BooleanField(default=False)
source = models.CharField(max_length=100)
privacy = models.CharField(
max_length=255, default="public", choices=PrivacyLevels.choices
)
retry = models.BooleanField(default=False)
@property
def pending_items(self):
"""items that haven't been processed yet"""
return self.items.filter(fail_reason__isnull=True, book__isnull=True)
class ImportItem(models.Model):
"""a single line of a csv being imported"""
@ -56,6 +57,7 @@ class ImportItem(models.Model):
job = models.ForeignKey(ImportJob, on_delete=models.CASCADE, related_name="items")
index = models.IntegerField()
data = models.JSONField()
normalized_data = models.JSONField()
book = models.ForeignKey(Book, on_delete=models.SET_NULL, null=True, blank=True)
book_guess = models.ForeignKey(
Book,
@ -65,9 +67,26 @@ class ImportItem(models.Model):
related_name="book_guess",
)
fail_reason = models.TextField(null=True)
linked_review = models.ForeignKey(
"Review", on_delete=models.SET_NULL, null=True, blank=True
)
def update_job(self):
"""let the job know when the items get work done"""
job = self.job
job.updated_date = timezone.now()
job.save()
if not job.pending_items.exists() and not job.complete:
job.complete = True
job.save(update_fields=["complete"])
def resolve(self):
"""try various ways to lookup a book"""
# we might be calling this after manually adding the book,
# so no need to do searches
if self.book:
return
if self.isbn:
self.book = self.get_book_from_isbn()
else:
@ -85,6 +104,10 @@ class ImportItem(models.Model):
self.isbn, min_confidence=0.999
)
if search_result:
# it's already in the right format
if isinstance(search_result, Edition):
return search_result
# it's just a search result, book needs to be created
# raises ConnectorException
return search_result.connector.get_or_create_book(search_result.key)
return None
@ -96,6 +119,8 @@ class ImportItem(models.Model):
search_term, min_confidence=0.1
)
if search_result:
if isinstance(search_result, Edition):
return (search_result, 1)
# raises ConnectorException
return (
search_result.connector.get_or_create_book(search_result.key),
@ -106,56 +131,62 @@ class ImportItem(models.Model):
@property
def title(self):
"""get the book title"""
return self.data["Title"]
return self.normalized_data.get("title")
@property
def author(self):
"""get the book title"""
return self.data["Author"]
"""get the book's authors"""
return self.normalized_data.get("authors")
@property
def isbn(self):
"""pulls out the isbn13 field from the csv line data"""
return unquote_string(self.data["ISBN13"])
return unquote_string(self.normalized_data.get("isbn_13")) or unquote_string(
self.normalized_data.get("isbn_10")
)
@property
def shelf(self):
"""the goodreads shelf field"""
if self.data["Exclusive Shelf"]:
return GOODREADS_SHELVES.get(self.data["Exclusive Shelf"])
return None
return self.normalized_data.get("shelf")
@property
def review(self):
"""a user-written review, to be imported with the book data"""
return self.data["My Review"]
return self.normalized_data.get("review_body")
@property
def rating(self):
"""x/5 star rating for a book"""
if self.data.get("My Rating", None):
return int(self.data["My Rating"])
if self.normalized_data.get("rating"):
return float(self.normalized_data.get("rating"))
return None
@property
def date_added(self):
"""when the book was added to this dataset"""
if self.data["Date Added"]:
return timezone.make_aware(dateutil.parser.parse(self.data["Date Added"]))
if self.normalized_data.get("date_added"):
return timezone.make_aware(
dateutil.parser.parse(self.normalized_data.get("date_added"))
)
return None
@property
def date_started(self):
"""when the book was started"""
if "Date Started" in self.data and self.data["Date Started"]:
return timezone.make_aware(dateutil.parser.parse(self.data["Date Started"]))
if self.normalized_data.get("date_started"):
return timezone.make_aware(
dateutil.parser.parse(self.normalized_data.get("date_started"))
)
return None
@property
def date_read(self):
"""the date a book was completed"""
if self.data["Date Read"]:
return timezone.make_aware(dateutil.parser.parse(self.data["Date Read"]))
if self.normalized_data.get("date_finished"):
return timezone.make_aware(
dateutil.parser.parse(self.normalized_data.get("date_finished"))
)
return None
@property
@ -174,7 +205,9 @@ class ImportItem(models.Model):
if start_date and start_date is not None and not self.date_read:
return [ReadThrough(start_date=start_date)]
if self.date_read:
start_date = start_date if start_date < self.date_read else None
start_date = (
start_date if start_date and start_date < self.date_read else None
)
return [
ReadThrough(
start_date=start_date,
@ -185,8 +218,10 @@ class ImportItem(models.Model):
def __repr__(self):
# pylint: disable=consider-using-f-string
return "<{!r}Item {!r}>".format(self.data["import_source"], self.data["Title"])
return "<{!r} Item {!r}>".format(self.index, self.normalized_data.get("title"))
def __str__(self):
# pylint: disable=consider-using-f-string
return "{} by {}".format(self.data["Title"], self.data["Author"])
return "{} by {}".format(
self.normalized_data.get("title"), self.normalized_data.get("authors")
)

View File

@ -157,9 +157,12 @@ def notify_user_on_unboost(sender, instance, *args, **kwargs):
@receiver(models.signals.post_save, sender=ImportJob)
# pylint: disable=unused-argument
def notify_user_on_import_complete(sender, instance, *args, **kwargs):
def notify_user_on_import_complete(
sender, instance, *args, update_fields=None, **kwargs
):
"""we imported your books! aren't you proud of us"""
if not instance.complete:
update_fields = update_fields or []
if not instance.complete or "complete" not in update_fields:
return
Notification.objects.create(
user=instance.user,

View File

@ -1,5 +1,6 @@
""" the particulars for this instance of BookWyrm """
import datetime
from urllib.parse import urljoin
from django.db import models, IntegrityError
from django.dispatch import receiver
@ -7,9 +8,10 @@ from django.utils import timezone
from model_utils import FieldTracker
from bookwyrm.preview_images import generate_site_preview_image_task
from bookwyrm.settings import DOMAIN, ENABLE_PREVIEW_IMAGES
from bookwyrm.settings import DOMAIN, ENABLE_PREVIEW_IMAGES, STATIC_FULL_URL
from .base_model import BookWyrmModel, new_access_code
from .user import User
from .fields import get_absolute_url
class SiteSettings(models.Model):
@ -66,6 +68,28 @@ class SiteSettings(models.Model):
default_settings.save()
return default_settings
@property
def logo_url(self):
"""helper to build the logo url"""
return self.get_url("logo", "images/logo.png")
@property
def logo_small_url(self):
"""helper to build the logo url"""
return self.get_url("logo_small", "images/logo-small.png")
@property
def favicon_url(self):
"""helper to build the logo url"""
return self.get_url("favicon", "images/favicon.png")
def get_url(self, field, default_path):
"""get a media url or a default static path"""
uploaded = getattr(self, field, None)
if uploaded:
return get_absolute_url(uploaded)
return urljoin(STATIC_FULL_URL, default_path)
class SiteInvite(models.Model):
"""gives someone access to create an account on the instance"""

View File

@ -19,7 +19,6 @@ from bookwyrm.settings import ENABLE_PREVIEW_IMAGES
from .activitypub_mixin import ActivitypubMixin, ActivityMixin
from .activitypub_mixin import OrderedCollectionPageMixin
from .base_model import BookWyrmModel
from .fields import image_serializer
from .readthrough import ProgressMode
from . import fields
@ -190,15 +189,26 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel):
if hasattr(activity, "name"):
activity.name = self.pure_name
activity.type = self.pure_type
activity.attachment = [
image_serializer(b.cover, b.alt_text)
for b in self.mention_books.all()[:4]
if b.cover
]
if hasattr(self, "book") and self.book.cover:
activity.attachment.append(
image_serializer(self.book.cover, self.book.alt_text)
)
book = getattr(self, "book", None)
books = [book] if book else []
books += list(self.mention_books.all())
if len(books) == 1 and getattr(books[0], "preview_image", None):
covers = [
activitypub.Document(
url=fields.get_absolute_url(books[0].preview_image),
name=books[0].alt_text,
)
]
else:
covers = [
activitypub.Document(
url=fields.get_absolute_url(b.cover),
name=b.alt_text,
)
for b in books
if b and b.cover
]
activity.attachment = covers
return activity
def to_activity(self, pure=False): # pylint: disable=arguments-differ