Adds deduplication fields
This commit is contained in:
@ -12,11 +12,11 @@ from . import fields
|
||||
class Author(ActivitypubMixin, BookWyrmModel):
|
||||
''' basic biographic info '''
|
||||
origin_id = models.CharField(max_length=255, null=True)
|
||||
''' copy of an author from OL '''
|
||||
openlibrary_key = fields.CharField(max_length=255, blank=True, null=True)
|
||||
openlibrary_key = fields.CharField(
|
||||
max_length=255, blank=True, null=True, deduplication_field=True)
|
||||
sync = models.BooleanField(default=True)
|
||||
last_sync_date = models.DateTimeField(default=timezone.now)
|
||||
wikipedia_link = fields.CharField(max_length=255, blank=True, null=True)
|
||||
wikipedia_link = fields.CharField(max_length=255, blank=True, null=True, deduplication_field=True)
|
||||
# idk probably other keys would be useful here?
|
||||
born = fields.DateTimeField(blank=True, null=True)
|
||||
died = fields.DateTimeField(blank=True, null=True)
|
||||
|
@ -1,5 +1,7 @@
|
||||
''' base model with default fields '''
|
||||
from base64 import b64encode
|
||||
from functools import reduce
|
||||
import operator
|
||||
from uuid import uuid4
|
||||
|
||||
from Crypto.PublicKey import RSA
|
||||
@ -7,6 +9,7 @@ from Crypto.Signature import pkcs1_15
|
||||
from Crypto.Hash import SHA256
|
||||
from django.core.paginator import Paginator
|
||||
from django.db import models
|
||||
from django.db.models import Q
|
||||
from django.dispatch import receiver
|
||||
|
||||
from bookwyrm import activitypub
|
||||
@ -64,6 +67,50 @@ class ActivitypubMixin:
|
||||
activity_serializer = lambda: {}
|
||||
reverse_unfurl = False
|
||||
|
||||
@classmethod
|
||||
def find_existing_by_remote_id(cls, remote_id):
|
||||
''' look up a remote id in the db '''
|
||||
return cls.find_existing({'id': remote_id})
|
||||
|
||||
@classmethod
|
||||
def find_existing(cls, data):
|
||||
''' compare data to fields that can be used for deduplation.
|
||||
This always includes remote_id, but can also be unique identifiers
|
||||
like an isbn for an edition '''
|
||||
filters = []
|
||||
for field in cls._meta.get_fields():
|
||||
if not hasattr(field, 'deduplication_field') or \
|
||||
not field.deduplication_field:
|
||||
continue
|
||||
|
||||
value = data.get(field.activitypub_field)
|
||||
if not value:
|
||||
continue
|
||||
filters.append({field.name: value})
|
||||
|
||||
if hasattr(cls, 'origin_id') and 'id' in data:
|
||||
# kinda janky, but this handles special case for books
|
||||
filters.append({'origin_id': data['id']})
|
||||
|
||||
if not filters:
|
||||
# if there are no deduplication fields, it will match the first
|
||||
# item no matter what. this shouldn't happen but just in case.
|
||||
return None
|
||||
|
||||
objects = cls.objects
|
||||
if hasattr(objects, 'select_subclasses'):
|
||||
objects = objects.select_subclasses()
|
||||
|
||||
# an OR operation on all the match fields
|
||||
match = objects.filter(
|
||||
reduce(
|
||||
operator.or_, (Q(**f) for f in filters)
|
||||
)
|
||||
)
|
||||
# there OUGHT to be only one match
|
||||
return match.first()
|
||||
|
||||
|
||||
def to_activity(self):
|
||||
''' convert from a model to an activity '''
|
||||
activity = {}
|
||||
|
@ -16,9 +16,12 @@ class Book(ActivitypubMixin, BookWyrmModel):
|
||||
''' a generic book, which can mean either an edition or a work '''
|
||||
origin_id = models.CharField(max_length=255, null=True, blank=True)
|
||||
# these identifiers apply to both works and editions
|
||||
openlibrary_key = fields.CharField(max_length=255, blank=True, null=True)
|
||||
librarything_key = fields.CharField(max_length=255, blank=True, null=True)
|
||||
goodreads_key = fields.CharField(max_length=255, blank=True, null=True)
|
||||
openlibrary_key = fields.CharField(
|
||||
max_length=255, blank=True, null=True, deduplication_field=True)
|
||||
librarything_key = fields.CharField(
|
||||
max_length=255, blank=True, null=True, deduplication_field=True)
|
||||
goodreads_key = fields.CharField(
|
||||
max_length=255, blank=True, null=True, deduplication_field=True)
|
||||
|
||||
# info about where the data comes from and where/if to sync
|
||||
sync = models.BooleanField(default=True)
|
||||
@ -83,7 +86,8 @@ class Book(ActivitypubMixin, BookWyrmModel):
|
||||
class Work(OrderedCollectionPageMixin, Book):
|
||||
''' a work (an abstract concept of a book that manifests in an edition) '''
|
||||
# library of congress catalog control number
|
||||
lccn = fields.CharField(max_length=255, blank=True, null=True)
|
||||
lccn = fields.CharField(
|
||||
max_length=255, blank=True, null=True, deduplication_field=True)
|
||||
# this has to be nullable but should never be null
|
||||
default_edition = fields.ForeignKey(
|
||||
'Edition',
|
||||
@ -103,10 +107,14 @@ class Work(OrderedCollectionPageMixin, Book):
|
||||
class Edition(Book):
|
||||
''' an edition of a book '''
|
||||
# these identifiers only apply to editions, not works
|
||||
isbn_10 = fields.CharField(max_length=255, blank=True, null=True)
|
||||
isbn_13 = fields.CharField(max_length=255, blank=True, null=True)
|
||||
oclc_number = fields.CharField(max_length=255, blank=True, null=True)
|
||||
asin = fields.CharField(max_length=255, blank=True, null=True)
|
||||
isbn_10 = fields.CharField(
|
||||
max_length=255, blank=True, null=True, deduplication_field=True)
|
||||
isbn_13 = fields.CharField(
|
||||
max_length=255, blank=True, null=True, deduplication_field=True)
|
||||
oclc_number = fields.CharField(
|
||||
max_length=255, blank=True, null=True, deduplication_field=True)
|
||||
asin = fields.CharField(
|
||||
max_length=255, blank=True, null=True, deduplication_field=True)
|
||||
pages = fields.IntegerField(blank=True, null=True)
|
||||
physical_format = fields.CharField(max_length=255, blank=True, null=True)
|
||||
publishers = fields.ArrayField(
|
||||
|
@ -28,7 +28,9 @@ def validate_remote_id(value):
|
||||
class ActivitypubFieldMixin:
|
||||
''' make a database field serializable '''
|
||||
def __init__(self, *args, \
|
||||
activitypub_field=None, activitypub_wrapper=None, **kwargs):
|
||||
activitypub_field=None, activitypub_wrapper=None,
|
||||
deduplication_field=False, **kwargs):
|
||||
self.deduplication_field = deduplication_field
|
||||
if activitypub_wrapper:
|
||||
self.activitypub_wrapper = activitypub_field
|
||||
self.activitypub_field = activitypub_wrapper
|
||||
@ -86,6 +88,8 @@ class RemoteIdField(ActivitypubFieldMixin, models.CharField):
|
||||
*args, max_length=max_length, validators=validators,
|
||||
**kwargs
|
||||
)
|
||||
# for this field, the default is true. false everywhere else.
|
||||
self.deduplication_field = kwargs.get('deduplication_field', True)
|
||||
|
||||
|
||||
class UsernameField(ActivitypubFieldMixin, models.CharField):
|
||||
|
@ -32,7 +32,9 @@ class User(OrderedCollectionPageMixin, AbstractUser):
|
||||
inbox = fields.RemoteIdField(unique=True)
|
||||
shared_inbox = fields.RemoteIdField(
|
||||
activitypub_field='sharedInbox',
|
||||
activitypub_wrapper='endpoints', null=True)
|
||||
activitypub_wrapper='endpoints',
|
||||
deduplication_field=False,
|
||||
null=True)
|
||||
federated_server = models.ForeignKey(
|
||||
'FederatedServer',
|
||||
on_delete=models.PROTECT,
|
||||
|
Reference in New Issue
Block a user