move some code around for readability
This commit is contained in:
parent
c5a3e57bb4
commit
1246042c12
|
@ -5,6 +5,44 @@ from django.db.models import Count
|
||||||
from bookwyrm import models
|
from bookwyrm import models
|
||||||
|
|
||||||
|
|
||||||
|
def update_related(canonical, obj):
|
||||||
|
''' update all the models with fk to the object being removed '''
|
||||||
|
# move related models to canonical
|
||||||
|
related_models = [
|
||||||
|
(r.remote_field.name, r.related_model) for r in \
|
||||||
|
canonical._meta.related_objects]
|
||||||
|
for (related_field, related_model) in related_models:
|
||||||
|
related_objs = related_model.objects.filter(
|
||||||
|
**{related_field: obj})
|
||||||
|
for related_obj in related_objs:
|
||||||
|
print(
|
||||||
|
'replacing in',
|
||||||
|
related_model.__name__,
|
||||||
|
related_field,
|
||||||
|
related_obj.id
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
setattr(related_obj, related_field, canonical)
|
||||||
|
related_obj.save()
|
||||||
|
except TypeError:
|
||||||
|
getattr(related_obj, related_field).add(canonical)
|
||||||
|
getattr(related_obj, related_field).remove(obj)
|
||||||
|
|
||||||
|
|
||||||
|
def copy_data(canonical, obj):
|
||||||
|
''' try to get the most data possible '''
|
||||||
|
for data_field in obj._meta.get_fields():
|
||||||
|
if not hasattr(data_field, 'activitypub_field'):
|
||||||
|
continue
|
||||||
|
data_value = getattr(obj, data_field.name)
|
||||||
|
if not data_value:
|
||||||
|
continue
|
||||||
|
if not getattr(canonical, data_field.name):
|
||||||
|
print('setting data field', data_field.name, data_value)
|
||||||
|
setattr(canonical, data_field.name, data_value)
|
||||||
|
canonical.save()
|
||||||
|
|
||||||
|
|
||||||
def dedupe_model(model):
|
def dedupe_model(model):
|
||||||
''' combine duplicate editions and update related models '''
|
''' combine duplicate editions and update related models '''
|
||||||
fields = model._meta.get_fields()
|
fields = model._meta.get_fields()
|
||||||
|
@ -25,42 +63,19 @@ def dedupe_model(model):
|
||||||
**{field.name: value}
|
**{field.name: value}
|
||||||
).order_by('id')
|
).order_by('id')
|
||||||
canonical = objs.first()
|
canonical = objs.first()
|
||||||
print('keeping', canonical.remote_id, canonical.id)
|
print('keeping', canonical.remote_id)
|
||||||
for obj in objs[1:]:
|
for obj in objs[1:]:
|
||||||
print(obj.remote_id, obj.id)
|
print(obj.remote_id)
|
||||||
# try to get the most data possible
|
copy_data(canonical, obj)
|
||||||
for data_field in obj._meta.get_fields():
|
update_related(canonical, obj)
|
||||||
if not hasattr(data_field, 'activitypub_field'):
|
# remove the outdated entry
|
||||||
continue
|
|
||||||
data_value = getattr(obj, data_field.name)
|
|
||||||
if not data_value:
|
|
||||||
continue
|
|
||||||
if not getattr(canonical, data_field.name):
|
|
||||||
print('setting data field', data_field.name, data_value)
|
|
||||||
setattr(canonical, data_field.name, data_value)
|
|
||||||
canonical.save()
|
|
||||||
|
|
||||||
# move related models to canonical
|
|
||||||
related_models = [
|
|
||||||
(r.remote_field.name, r.related_model) for r in \
|
|
||||||
canonical._meta.related_objects]
|
|
||||||
for (related_field, related_model) in related_models:
|
|
||||||
related_objs = related_model.objects.filter(
|
|
||||||
**{related_field: obj})
|
|
||||||
for related_obj in related_objs:
|
|
||||||
print(
|
|
||||||
'replacing in', related_model.__name__, related_obj)
|
|
||||||
try:
|
|
||||||
setattr(related_obj, related_field, canonical)
|
|
||||||
related_obj.save()
|
|
||||||
except TypeError:
|
|
||||||
getattr(related_obj, related_field).add(canonical)
|
|
||||||
getattr(related_obj, related_field).remove(obj)
|
|
||||||
obj.delete()
|
obj.delete()
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
|
''' dedplucate allllll the book data models '''
|
||||||
help = 'merges duplicate book data'
|
help = 'merges duplicate book data'
|
||||||
|
# pylint: disable=no-self-use,unused-argument
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
''' run deudplications '''
|
''' run deudplications '''
|
||||||
dedupe_model(models.Edition)
|
dedupe_model(models.Edition)
|
||||||
|
|
Loading…
Reference in New Issue