Re-thinks connector mappings
This commit is contained in:
parent
d54c8c4dc4
commit
0f579e7d8d
@ -17,26 +17,24 @@ class AbstractConnector(ABC):
|
|||||||
info = models.Connector.objects.get(identifier=identifier)
|
info = models.Connector.objects.get(identifier=identifier)
|
||||||
self.connector = info
|
self.connector = info
|
||||||
|
|
||||||
self.book_mappings = {}
|
self.key_mappings = []
|
||||||
self.key_mappings = {
|
|
||||||
'isbn_13': ('isbn_13', None),
|
|
||||||
'isbn_10': ('isbn_10', None),
|
|
||||||
'oclc_numbers': ('oclc_number', None),
|
|
||||||
'lccn': ('lccn', None),
|
|
||||||
}
|
|
||||||
|
|
||||||
fields = [
|
# fields we want to look for in book data to copy over
|
||||||
|
# title we handle separately.
|
||||||
|
self.book_mappings = []
|
||||||
|
|
||||||
|
# the things in the connector model to copy over
|
||||||
|
self_fields = [
|
||||||
'base_url',
|
'base_url',
|
||||||
'books_url',
|
'books_url',
|
||||||
'covers_url',
|
'covers_url',
|
||||||
'search_url',
|
'search_url',
|
||||||
'key_name',
|
|
||||||
'max_query_count',
|
'max_query_count',
|
||||||
'name',
|
'name',
|
||||||
'identifier',
|
'identifier',
|
||||||
'local'
|
'local'
|
||||||
]
|
]
|
||||||
for field in fields:
|
for field in self_fields:
|
||||||
setattr(self, field, getattr(info, field))
|
setattr(self, field, getattr(info, field))
|
||||||
|
|
||||||
|
|
||||||
@ -85,7 +83,7 @@ class AbstractConnector(ABC):
|
|||||||
if self.is_work_data(data):
|
if self.is_work_data(data):
|
||||||
work_data = data
|
work_data = data
|
||||||
# if we requested a work and there's already an edition, we're set
|
# if we requested a work and there's already an edition, we're set
|
||||||
work = self.match_from_mappings(work_data)
|
work = self.match_from_mappings(work_data, models.Work)
|
||||||
if work and work.default_edition:
|
if work and work.default_edition:
|
||||||
return work.default_edition
|
return work.default_edition
|
||||||
|
|
||||||
@ -98,7 +96,7 @@ class AbstractConnector(ABC):
|
|||||||
edition_data = data
|
edition_data = data
|
||||||
else:
|
else:
|
||||||
edition_data = data
|
edition_data = data
|
||||||
edition = self.match_from_mappings(edition_data)
|
edition = self.match_from_mappings(edition_data, models.Edition)
|
||||||
# no need to figure out about the work if we already know about it
|
# no need to figure out about the work if we already know about it
|
||||||
if edition and edition.parent_work:
|
if edition and edition.parent_work:
|
||||||
return edition
|
return edition
|
||||||
@ -181,35 +179,25 @@ class AbstractConnector(ABC):
|
|||||||
return book
|
return book
|
||||||
|
|
||||||
|
|
||||||
def match_from_mappings(self, data):
|
def match_from_mappings(self, data, model):
|
||||||
''' try to find existing copies of this book using various keys '''
|
''' try to find existing copies of this book using various keys '''
|
||||||
keys = [
|
relevent_mappings = [m for m in self.key_mappings if \
|
||||||
('openlibrary_key', models.Book),
|
m.model and model == m.model]
|
||||||
('librarything_key', models.Book),
|
for mapping in relevent_mappings:
|
||||||
('goodreads_key', models.Book),
|
# check if this field is present in the data
|
||||||
('lccn', models.Work),
|
value = data.get(mapping.remote_field)
|
||||||
('isbn_10', models.Edition),
|
|
||||||
('isbn_13', models.Edition),
|
|
||||||
('oclc_number', models.Edition),
|
|
||||||
('asin', models.Edition),
|
|
||||||
]
|
|
||||||
noop = lambda x: x
|
|
||||||
for key, model in keys:
|
|
||||||
formatter = None
|
|
||||||
if key in self.key_mappings:
|
|
||||||
key, formatter = self.key_mappings[key]
|
|
||||||
if not formatter:
|
|
||||||
formatter = noop
|
|
||||||
|
|
||||||
value = data.get(key)
|
|
||||||
if not value:
|
if not value:
|
||||||
continue
|
continue
|
||||||
value = formatter(value)
|
|
||||||
|
|
||||||
match = model.objects.select_subclasses().filter(
|
# extract the value in the right format
|
||||||
**{key: value}).first()
|
value = mapping.formatter(value)
|
||||||
|
|
||||||
|
# search our database for a matching book
|
||||||
|
kwargs = {mapping.local_field: value}
|
||||||
|
match = model.objects.filter(**kwargs).first()
|
||||||
if match:
|
if match:
|
||||||
return match
|
return match
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
@ -254,23 +242,17 @@ class AbstractConnector(ABC):
|
|||||||
|
|
||||||
def update_from_mappings(obj, data, mappings):
|
def update_from_mappings(obj, data, mappings):
|
||||||
''' assign data to model with mappings '''
|
''' assign data to model with mappings '''
|
||||||
noop = lambda x: x
|
for mapping in mappings:
|
||||||
mappings['authors'] = ('', noop)
|
# check if this field is present in the data
|
||||||
mappings['parent_work'] = ('', noop)
|
value = data.get(mapping.remote_field)
|
||||||
for (key, value) in data.items():
|
if not value:
|
||||||
formatter = None
|
|
||||||
if key in mappings:
|
|
||||||
key, formatter = mappings[key]
|
|
||||||
if not formatter:
|
|
||||||
formatter = noop
|
|
||||||
|
|
||||||
if key == 'id':
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
# extract the value in the right format
|
||||||
hasattr(obj, key)
|
value = mapping.formatter(value)
|
||||||
except ValueError:
|
|
||||||
obj.__setattr__(key, formatter(value))
|
# assign the formatted value to the model
|
||||||
|
obj.__setattr__(mapping.local_field, value)
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
||||||
@ -315,3 +297,15 @@ class SearchResult(object):
|
|||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<SearchResult key={!r} title={!r} author={!r}>".format(
|
return "<SearchResult key={!r} title={!r} author={!r}>".format(
|
||||||
self.key, self.title, self.author)
|
self.key, self.title, self.author)
|
||||||
|
|
||||||
|
|
||||||
|
class Mapping(object):
|
||||||
|
''' associate a local database field with a field in an external dataset '''
|
||||||
|
def __init__(
|
||||||
|
self, local_field, remote_field=None, formatter=None, model=None):
|
||||||
|
noop = lambda x: x
|
||||||
|
|
||||||
|
self.local_field = local_field
|
||||||
|
self.remote_field = remote_field or local_field
|
||||||
|
self.formatter = formatter or noop
|
||||||
|
self.model = model
|
||||||
|
@ -6,7 +6,7 @@ from django.core.files.base import ContentFile
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
from fedireads import models
|
from fedireads import models
|
||||||
from .abstract_connector import AbstractConnector, SearchResult
|
from .abstract_connector import AbstractConnector, SearchResult, Mapping
|
||||||
from .abstract_connector import update_from_mappings, get_date, get_data
|
from .abstract_connector import update_from_mappings, get_date, get_data
|
||||||
|
|
||||||
|
|
||||||
@ -14,11 +14,37 @@ class Connector(AbstractConnector):
|
|||||||
''' interact with other instances '''
|
''' interact with other instances '''
|
||||||
def __init__(self, identifier):
|
def __init__(self, identifier):
|
||||||
super().__init__(identifier)
|
super().__init__(identifier)
|
||||||
self.book_mappings = self.key_mappings.copy()
|
self.key_mappings = [
|
||||||
self.book_mappings.update({
|
Mapping('isbn_13', model=models.Edition),
|
||||||
'published_date': ('published_date', get_date),
|
Mapping('isbn_10', model=models.Edition),
|
||||||
'first_published_date': ('first_published_date', get_date),
|
Mapping('lccn', model=models.Work),
|
||||||
})
|
Mapping('oclc_number', model=models.Edition),
|
||||||
|
Mapping('openlibrary_key'),
|
||||||
|
Mapping('goodreads_key'),
|
||||||
|
Mapping('asin'),
|
||||||
|
]
|
||||||
|
|
||||||
|
self.book_mappings = self.key_mappings + [
|
||||||
|
Mapping('sort_title'),
|
||||||
|
Mapping('subtitle'),
|
||||||
|
Mapping('description'),
|
||||||
|
Mapping('languages'),
|
||||||
|
Mapping('series'),
|
||||||
|
Mapping('series_number'),
|
||||||
|
Mapping('subjects'),
|
||||||
|
Mapping('subject_places'),
|
||||||
|
Mapping('first_published_date'),
|
||||||
|
Mapping('published_date'),
|
||||||
|
Mapping('pages'),
|
||||||
|
Mapping('physical_format'),
|
||||||
|
Mapping('publishers'),
|
||||||
|
]
|
||||||
|
|
||||||
|
self.author_mappings = [
|
||||||
|
Mapping('born', remote_field='birth_date', formatter=get_date),
|
||||||
|
Mapping('died', remote_field='death_date', formatter=get_date),
|
||||||
|
Mapping('bio'),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def is_work_data(self, data):
|
def is_work_data(self, data):
|
||||||
@ -63,11 +89,7 @@ class Connector(AbstractConnector):
|
|||||||
|
|
||||||
# ingest a new author
|
# ingest a new author
|
||||||
author = models.Author(remote_id=remote_id)
|
author = models.Author(remote_id=remote_id)
|
||||||
mappings = {
|
author = update_from_mappings(author, data, self.author_mappings)
|
||||||
'born': ('born', get_date),
|
|
||||||
'died': ('died', get_date),
|
|
||||||
}
|
|
||||||
author = update_from_mappings(author, data, mappings)
|
|
||||||
author.save()
|
author.save()
|
||||||
|
|
||||||
return author
|
return author
|
||||||
|
@ -5,7 +5,7 @@ import requests
|
|||||||
from django.core.files.base import ContentFile
|
from django.core.files.base import ContentFile
|
||||||
|
|
||||||
from fedireads import models
|
from fedireads import models
|
||||||
from .abstract_connector import AbstractConnector, SearchResult
|
from .abstract_connector import AbstractConnector, SearchResult, Mapping
|
||||||
from .abstract_connector import update_from_mappings
|
from .abstract_connector import update_from_mappings
|
||||||
from .abstract_connector import get_date, get_data
|
from .abstract_connector import get_date, get_data
|
||||||
from .openlibrary_languages import languages
|
from .openlibrary_languages import languages
|
||||||
@ -15,23 +15,61 @@ class Connector(AbstractConnector):
|
|||||||
''' instantiate a connector for OL '''
|
''' instantiate a connector for OL '''
|
||||||
def __init__(self, identifier):
|
def __init__(self, identifier):
|
||||||
super().__init__(identifier)
|
super().__init__(identifier)
|
||||||
get_first = lambda a: a[0]
|
|
||||||
self.key_mappings = {
|
|
||||||
'isbn_13': ('isbn_13', get_first),
|
|
||||||
'isbn_10': ('isbn_10', get_first),
|
|
||||||
'oclc_numbers': ('oclc_number', get_first),
|
|
||||||
'lccn': ('lccn', get_first),
|
|
||||||
}
|
|
||||||
|
|
||||||
self.book_mappings = self.key_mappings.copy()
|
get_first = lambda a: a[0]
|
||||||
self.book_mappings.update({
|
self.key_mappings = [
|
||||||
'publish_date': ('published_date', get_date),
|
Mapping('isbn_13', model=models.Edition, formatter=get_first),
|
||||||
'first_publish_date': ('first_published_date', get_date),
|
Mapping('isbn_10', model=models.Edition, formatter=get_first),
|
||||||
'description': ('description', get_description),
|
Mapping('lccn', model=models.Work, formatter=get_first),
|
||||||
'languages': ('languages', get_languages),
|
Mapping(
|
||||||
'number_of_pages': ('pages', None),
|
'oclc_number',
|
||||||
'series': ('series', get_first),
|
remote_field='oclc_numbers',
|
||||||
})
|
model=models.Edition,
|
||||||
|
formatter=get_first
|
||||||
|
),
|
||||||
|
Mapping(
|
||||||
|
'openlibrary_key',
|
||||||
|
remote_field='key',
|
||||||
|
formatter=get_openlibrary_key
|
||||||
|
),
|
||||||
|
Mapping('goodreads_key'),
|
||||||
|
Mapping('asin'),
|
||||||
|
]
|
||||||
|
|
||||||
|
self.book_mappings = self.key_mappings + [
|
||||||
|
Mapping('sort_title'),
|
||||||
|
Mapping('subtitle'),
|
||||||
|
Mapping('description', formatter=get_description),
|
||||||
|
Mapping('languages', formatter=get_languages),
|
||||||
|
Mapping('series', formatter=get_first),
|
||||||
|
Mapping('series_number'),
|
||||||
|
Mapping('subjects'),
|
||||||
|
Mapping('subject_places'),
|
||||||
|
Mapping(
|
||||||
|
'first_published_date',
|
||||||
|
remote_field='first_publish_date',
|
||||||
|
formatter=get_date
|
||||||
|
),
|
||||||
|
Mapping(
|
||||||
|
'published_date',
|
||||||
|
remote_field='publish_date',
|
||||||
|
formatter=get_date
|
||||||
|
),
|
||||||
|
Mapping(
|
||||||
|
'pages',
|
||||||
|
model=models.Edition,
|
||||||
|
remote_field='number_of_pages'
|
||||||
|
),
|
||||||
|
Mapping('physical_format', model=models.Edition),
|
||||||
|
Mapping('publishers'),
|
||||||
|
]
|
||||||
|
|
||||||
|
self.author_mappings = [
|
||||||
|
Mapping('born', remote_field='birth_date', formatter=get_date),
|
||||||
|
Mapping('died', remote_field='death_date', formatter=get_date),
|
||||||
|
Mapping('bio', formatter=get_description),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def is_work_data(self, data):
|
def is_work_data(self, data):
|
||||||
@ -133,12 +171,7 @@ class Connector(AbstractConnector):
|
|||||||
data = get_data(url)
|
data = get_data(url)
|
||||||
|
|
||||||
author = models.Author(openlibrary_key=olkey)
|
author = models.Author(openlibrary_key=olkey)
|
||||||
mappings = {
|
author = update_from_mappings(author, data, self.author_mappings)
|
||||||
'birth_date': ('born', get_date),
|
|
||||||
'death_date': ('died', get_date),
|
|
||||||
'bio': ('bio', get_description),
|
|
||||||
}
|
|
||||||
author = update_from_mappings(author, data, mappings)
|
|
||||||
name = data.get('name')
|
name = data.get('name')
|
||||||
# TODO this is making some BOLD assumption
|
# TODO this is making some BOLD assumption
|
||||||
if name:
|
if name:
|
||||||
@ -156,6 +189,11 @@ def get_description(description_blob):
|
|||||||
return description_blob
|
return description_blob
|
||||||
|
|
||||||
|
|
||||||
|
def get_openlibrary_key(key):
|
||||||
|
''' convert /books/OL27320736M into OL27320736M '''
|
||||||
|
return key.split('/')[-1]
|
||||||
|
|
||||||
|
|
||||||
def get_languages(language_blob):
|
def get_languages(language_blob):
|
||||||
''' /language/eng -> English '''
|
''' /language/eng -> English '''
|
||||||
langs = []
|
langs = []
|
||||||
|
21
fedireads/migrations/0039_auto_20200510_2342.py
Normal file
21
fedireads/migrations/0039_auto_20200510_2342.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# Generated by Django 3.0.3 on 2020-05-10 23:42
|
||||||
|
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('fedireads', '0038_author_remote_id'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='book',
|
||||||
|
name='misc_identifiers',
|
||||||
|
),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='connector',
|
||||||
|
name='key_name',
|
||||||
|
),
|
||||||
|
]
|
@ -29,8 +29,6 @@ class Connector(FedireadsModel):
|
|||||||
covers_url = models.CharField(max_length=255)
|
covers_url = models.CharField(max_length=255)
|
||||||
search_url = models.CharField(max_length=255, null=True)
|
search_url = models.CharField(max_length=255, null=True)
|
||||||
|
|
||||||
key_name = models.CharField(max_length=255)
|
|
||||||
|
|
||||||
politeness_delay = models.IntegerField(null=True) #seconds
|
politeness_delay = models.IntegerField(null=True) #seconds
|
||||||
max_query_count = models.IntegerField(null=True)
|
max_query_count = models.IntegerField(null=True)
|
||||||
# how many queries executed in a unit of time, like a day
|
# how many queries executed in a unit of time, like a day
|
||||||
@ -54,7 +52,6 @@ class Book(FedireadsModel):
|
|||||||
openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
|
openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
|
||||||
librarything_key = models.CharField(max_length=255, blank=True, null=True)
|
librarything_key = models.CharField(max_length=255, blank=True, null=True)
|
||||||
goodreads_key = models.CharField(max_length=255, blank=True, null=True)
|
goodreads_key = models.CharField(max_length=255, blank=True, null=True)
|
||||||
misc_identifiers = JSONField(null=True)
|
|
||||||
|
|
||||||
# info about where the data comes from and where/if to sync
|
# info about where the data comes from and where/if to sync
|
||||||
sync = models.BooleanField(default=True)
|
sync = models.BooleanField(default=True)
|
||||||
|
@ -18,7 +18,6 @@ class FedireadsConnector(TestCase):
|
|||||||
books_url='https://example.com',
|
books_url='https://example.com',
|
||||||
covers_url='https://example.com/images/covers',
|
covers_url='https://example.com/images/covers',
|
||||||
search_url='https://example.com/search?q=',
|
search_url='https://example.com/search?q=',
|
||||||
key_name='remote_id',
|
|
||||||
)
|
)
|
||||||
self.connector = Connector('example.com')
|
self.connector = Connector('example.com')
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ import pytz
|
|||||||
from fedireads import models
|
from fedireads import models
|
||||||
from fedireads.connectors.openlibrary import Connector
|
from fedireads.connectors.openlibrary import Connector
|
||||||
from fedireads.connectors.openlibrary import get_languages, get_description
|
from fedireads.connectors.openlibrary import get_languages, get_description
|
||||||
from fedireads.connectors.openlibrary import pick_default_edition
|
from fedireads.connectors.openlibrary import pick_default_edition, get_openlibrary_key
|
||||||
from fedireads.connectors.abstract_connector import SearchResult, get_date
|
from fedireads.connectors.abstract_connector import SearchResult, get_date
|
||||||
|
|
||||||
|
|
||||||
@ -22,7 +22,6 @@ class Openlibrary(TestCase):
|
|||||||
books_url='https://openlibrary.org',
|
books_url='https://openlibrary.org',
|
||||||
covers_url='https://covers.openlibrary.org',
|
covers_url='https://covers.openlibrary.org',
|
||||||
search_url='https://openlibrary.org/search?q=',
|
search_url='https://openlibrary.org/search?q=',
|
||||||
key_name='openlibrary_key',
|
|
||||||
)
|
)
|
||||||
self.connector = Connector('openlibrary.org')
|
self.connector = Connector('openlibrary.org')
|
||||||
|
|
||||||
@ -77,3 +76,9 @@ class Openlibrary(TestCase):
|
|||||||
def test_get_languages(self):
|
def test_get_languages(self):
|
||||||
languages = get_languages(self.edition_data['languages'])
|
languages = get_languages(self.edition_data['languages'])
|
||||||
self.assertEqual(languages, ['English'])
|
self.assertEqual(languages, ['English'])
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_ol_key(self):
|
||||||
|
key = get_openlibrary_key('/books/OL27320736M')
|
||||||
|
self.assertEqual(key, 'OL27320736M')
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user