Re-thinks connector mappings
This commit is contained in:
		| @@ -17,26 +17,24 @@ class AbstractConnector(ABC): | ||||
|         info = models.Connector.objects.get(identifier=identifier) | ||||
|         self.connector = info | ||||
|  | ||||
|         self.book_mappings = {} | ||||
|         self.key_mappings = { | ||||
|             'isbn_13': ('isbn_13', None), | ||||
|             'isbn_10': ('isbn_10', None), | ||||
|             'oclc_numbers': ('oclc_number', None), | ||||
|             'lccn': ('lccn', None), | ||||
|         } | ||||
|         self.key_mappings = [] | ||||
|  | ||||
|         fields = [ | ||||
|         # fields we want to look for in book data to copy over | ||||
|         # title we handle separately. | ||||
|         self.book_mappings = [] | ||||
|  | ||||
|         # the things in the connector model to copy over | ||||
|         self_fields = [ | ||||
|             'base_url', | ||||
|             'books_url', | ||||
|             'covers_url', | ||||
|             'search_url', | ||||
|             'key_name', | ||||
|             'max_query_count', | ||||
|             'name', | ||||
|             'identifier', | ||||
|             'local' | ||||
|         ] | ||||
|         for field in fields: | ||||
|         for field in self_fields: | ||||
|             setattr(self, field, getattr(info, field)) | ||||
|  | ||||
|  | ||||
| @@ -85,7 +83,7 @@ class AbstractConnector(ABC): | ||||
|         if self.is_work_data(data): | ||||
|             work_data = data | ||||
|             # if we requested a work and there's already an edition, we're set | ||||
|             work = self.match_from_mappings(work_data) | ||||
|             work = self.match_from_mappings(work_data, models.Work) | ||||
|             if work and work.default_edition: | ||||
|                 return work.default_edition | ||||
|  | ||||
| @@ -98,7 +96,7 @@ class AbstractConnector(ABC): | ||||
|                 edition_data = data | ||||
|         else: | ||||
|             edition_data = data | ||||
|             edition = self.match_from_mappings(edition_data) | ||||
|             edition = self.match_from_mappings(edition_data, models.Edition) | ||||
|             # no need to figure out about the work if we already know about it | ||||
|             if edition and edition.parent_work: | ||||
|                 return edition | ||||
| @@ -181,35 +179,25 @@ class AbstractConnector(ABC): | ||||
|         return book | ||||
|  | ||||
|  | ||||
|     def match_from_mappings(self, data): | ||||
|     def match_from_mappings(self, data, model): | ||||
|         ''' try to find existing copies of this book using various keys ''' | ||||
|         keys = [ | ||||
|             ('openlibrary_key', models.Book), | ||||
|             ('librarything_key', models.Book), | ||||
|             ('goodreads_key', models.Book), | ||||
|             ('lccn', models.Work), | ||||
|             ('isbn_10', models.Edition), | ||||
|             ('isbn_13', models.Edition), | ||||
|             ('oclc_number', models.Edition), | ||||
|             ('asin', models.Edition), | ||||
|         ] | ||||
|         noop = lambda x: x | ||||
|         for key, model in keys: | ||||
|             formatter = None | ||||
|             if key in self.key_mappings: | ||||
|                 key, formatter = self.key_mappings[key] | ||||
|             if not formatter: | ||||
|                 formatter = noop | ||||
|  | ||||
|             value = data.get(key) | ||||
|         relevent_mappings = [m for m in self.key_mappings if \ | ||||
|                 m.model and model == m.model] | ||||
|         for mapping in relevent_mappings: | ||||
|             # check if this field is present in the data | ||||
|             value = data.get(mapping.remote_field) | ||||
|             if not value: | ||||
|                 continue | ||||
|             value = formatter(value) | ||||
|  | ||||
|             match = model.objects.select_subclasses().filter( | ||||
|                 **{key: value}).first() | ||||
|             # extract the value in the right format | ||||
|             value = mapping.formatter(value) | ||||
|  | ||||
|             # search our database for a matching book | ||||
|             kwargs = {mapping.local_field: value} | ||||
|             match = model.objects.filter(**kwargs).first() | ||||
|             if match: | ||||
|                 return match | ||||
|         return None | ||||
|  | ||||
|  | ||||
|     @abstractmethod | ||||
| @@ -254,23 +242,17 @@ class AbstractConnector(ABC): | ||||
|  | ||||
| def update_from_mappings(obj, data, mappings): | ||||
|     ''' assign data to model with mappings ''' | ||||
|     noop = lambda x: x | ||||
|     mappings['authors'] = ('', noop) | ||||
|     mappings['parent_work'] = ('', noop) | ||||
|     for (key, value) in data.items(): | ||||
|         formatter = None | ||||
|         if key in mappings: | ||||
|             key, formatter = mappings[key] | ||||
|         if not formatter: | ||||
|             formatter = noop | ||||
|  | ||||
|         if key == 'id': | ||||
|     for mapping in mappings: | ||||
|         # check if this field is present in the data | ||||
|         value = data.get(mapping.remote_field) | ||||
|         if not value: | ||||
|             continue | ||||
|  | ||||
|         try: | ||||
|             hasattr(obj, key) | ||||
|         except ValueError: | ||||
|             obj.__setattr__(key, formatter(value)) | ||||
|         # extract the value in the right format | ||||
|         value = mapping.formatter(value) | ||||
|  | ||||
|         # assign the formatted value to the model | ||||
|         obj.__setattr__(mapping.local_field, value) | ||||
|     return obj | ||||
|  | ||||
|  | ||||
| @@ -315,3 +297,15 @@ class SearchResult(object): | ||||
|     def __repr__(self): | ||||
|         return "<SearchResult key={!r} title={!r} author={!r}>".format( | ||||
|             self.key, self.title, self.author) | ||||
|  | ||||
|  | ||||
| class Mapping(object): | ||||
|     ''' associate a local database field with a field in an external dataset ''' | ||||
|     def __init__( | ||||
|             self, local_field, remote_field=None, formatter=None, model=None): | ||||
|         noop = lambda x: x | ||||
|  | ||||
|         self.local_field = local_field | ||||
|         self.remote_field = remote_field or local_field | ||||
|         self.formatter = formatter or noop | ||||
|         self.model = model | ||||
|   | ||||
| @@ -6,7 +6,7 @@ from django.core.files.base import ContentFile | ||||
| import requests | ||||
|  | ||||
| from fedireads import models | ||||
| from .abstract_connector import AbstractConnector, SearchResult | ||||
| from .abstract_connector import AbstractConnector, SearchResult, Mapping | ||||
| from .abstract_connector import update_from_mappings, get_date, get_data | ||||
|  | ||||
|  | ||||
| @@ -14,11 +14,37 @@ class Connector(AbstractConnector): | ||||
|     ''' interact with other instances ''' | ||||
|     def __init__(self, identifier): | ||||
|         super().__init__(identifier) | ||||
|         self.book_mappings = self.key_mappings.copy() | ||||
|         self.book_mappings.update({ | ||||
|             'published_date': ('published_date', get_date), | ||||
|             'first_published_date': ('first_published_date', get_date), | ||||
|         }) | ||||
|         self.key_mappings = [ | ||||
|             Mapping('isbn_13', model=models.Edition), | ||||
|             Mapping('isbn_10', model=models.Edition), | ||||
|             Mapping('lccn', model=models.Work), | ||||
|             Mapping('oclc_number', model=models.Edition), | ||||
|             Mapping('openlibrary_key'), | ||||
|             Mapping('goodreads_key'), | ||||
|             Mapping('asin'), | ||||
|         ] | ||||
|  | ||||
|         self.book_mappings = self.key_mappings + [ | ||||
|             Mapping('sort_title'), | ||||
|             Mapping('subtitle'), | ||||
|             Mapping('description'), | ||||
|             Mapping('languages'), | ||||
|             Mapping('series'), | ||||
|             Mapping('series_number'), | ||||
|             Mapping('subjects'), | ||||
|             Mapping('subject_places'), | ||||
|             Mapping('first_published_date'), | ||||
|             Mapping('published_date'), | ||||
|             Mapping('pages'), | ||||
|             Mapping('physical_format'), | ||||
|             Mapping('publishers'), | ||||
|         ] | ||||
|  | ||||
|         self.author_mappings = [ | ||||
|             Mapping('born', remote_field='birth_date', formatter=get_date), | ||||
|             Mapping('died', remote_field='death_date', formatter=get_date), | ||||
|             Mapping('bio'), | ||||
|         ] | ||||
|  | ||||
|  | ||||
|     def is_work_data(self, data): | ||||
| @@ -63,11 +89,7 @@ class Connector(AbstractConnector): | ||||
|  | ||||
|         # ingest a new author | ||||
|         author = models.Author(remote_id=remote_id) | ||||
|         mappings = { | ||||
|             'born': ('born', get_date), | ||||
|             'died': ('died', get_date), | ||||
|         } | ||||
|         author = update_from_mappings(author, data, mappings) | ||||
|         author = update_from_mappings(author, data, self.author_mappings) | ||||
|         author.save() | ||||
|  | ||||
|         return author | ||||
|   | ||||
| @@ -5,7 +5,7 @@ import requests | ||||
| from django.core.files.base import ContentFile | ||||
|  | ||||
| from fedireads import models | ||||
| from .abstract_connector import AbstractConnector, SearchResult | ||||
| from .abstract_connector import AbstractConnector, SearchResult, Mapping | ||||
| from .abstract_connector import update_from_mappings | ||||
| from .abstract_connector import get_date, get_data | ||||
| from .openlibrary_languages import languages | ||||
| @@ -15,23 +15,61 @@ class Connector(AbstractConnector): | ||||
|     ''' instantiate a connector for OL ''' | ||||
|     def __init__(self, identifier): | ||||
|         super().__init__(identifier) | ||||
|         get_first = lambda a: a[0] | ||||
|         self.key_mappings = { | ||||
|             'isbn_13': ('isbn_13', get_first), | ||||
|             'isbn_10': ('isbn_10', get_first), | ||||
|             'oclc_numbers': ('oclc_number', get_first), | ||||
|             'lccn': ('lccn', get_first), | ||||
|         } | ||||
|  | ||||
|         self.book_mappings = self.key_mappings.copy() | ||||
|         self.book_mappings.update({ | ||||
|             'publish_date': ('published_date', get_date), | ||||
|             'first_publish_date': ('first_published_date', get_date), | ||||
|             'description': ('description', get_description), | ||||
|             'languages': ('languages', get_languages), | ||||
|             'number_of_pages': ('pages', None), | ||||
|             'series': ('series', get_first), | ||||
|         }) | ||||
|         get_first = lambda a: a[0] | ||||
|         self.key_mappings = [ | ||||
|             Mapping('isbn_13', model=models.Edition, formatter=get_first), | ||||
|             Mapping('isbn_10', model=models.Edition, formatter=get_first), | ||||
|             Mapping('lccn', model=models.Work, formatter=get_first), | ||||
|             Mapping( | ||||
|                 'oclc_number', | ||||
|                 remote_field='oclc_numbers', | ||||
|                 model=models.Edition, | ||||
|                 formatter=get_first | ||||
|             ), | ||||
|             Mapping( | ||||
|                 'openlibrary_key', | ||||
|                 remote_field='key', | ||||
|                 formatter=get_openlibrary_key | ||||
|             ), | ||||
|             Mapping('goodreads_key'), | ||||
|             Mapping('asin'), | ||||
|         ] | ||||
|  | ||||
|         self.book_mappings = self.key_mappings + [ | ||||
|             Mapping('sort_title'), | ||||
|             Mapping('subtitle'), | ||||
|             Mapping('description', formatter=get_description), | ||||
|             Mapping('languages', formatter=get_languages), | ||||
|             Mapping('series', formatter=get_first), | ||||
|             Mapping('series_number'), | ||||
|             Mapping('subjects'), | ||||
|             Mapping('subject_places'), | ||||
|             Mapping( | ||||
|                 'first_published_date', | ||||
|                 remote_field='first_publish_date', | ||||
|                 formatter=get_date | ||||
|             ), | ||||
|             Mapping( | ||||
|                 'published_date', | ||||
|                 remote_field='publish_date', | ||||
|                 formatter=get_date | ||||
|             ), | ||||
|             Mapping( | ||||
|                 'pages', | ||||
|                 model=models.Edition, | ||||
|                 remote_field='number_of_pages' | ||||
|             ), | ||||
|             Mapping('physical_format', model=models.Edition), | ||||
|             Mapping('publishers'), | ||||
|         ] | ||||
|  | ||||
|         self.author_mappings = [ | ||||
|             Mapping('born', remote_field='birth_date', formatter=get_date), | ||||
|             Mapping('died', remote_field='death_date', formatter=get_date), | ||||
|             Mapping('bio', formatter=get_description), | ||||
|         ] | ||||
|  | ||||
|  | ||||
|  | ||||
|     def is_work_data(self, data): | ||||
| @@ -133,12 +171,7 @@ class Connector(AbstractConnector): | ||||
|         data = get_data(url) | ||||
|  | ||||
|         author = models.Author(openlibrary_key=olkey) | ||||
|         mappings = { | ||||
|             'birth_date': ('born', get_date), | ||||
|             'death_date': ('died', get_date), | ||||
|             'bio': ('bio', get_description), | ||||
|         } | ||||
|         author = update_from_mappings(author, data, mappings) | ||||
|         author = update_from_mappings(author, data, self.author_mappings) | ||||
|         name = data.get('name') | ||||
|         # TODO this is making some BOLD assumption | ||||
|         if name: | ||||
| @@ -156,6 +189,11 @@ def get_description(description_blob): | ||||
|     return  description_blob | ||||
|  | ||||
|  | ||||
| def get_openlibrary_key(key): | ||||
|     ''' convert /books/OL27320736M into OL27320736M ''' | ||||
|     return key.split('/')[-1] | ||||
|  | ||||
|  | ||||
| def get_languages(language_blob): | ||||
|     ''' /language/eng -> English ''' | ||||
|     langs = [] | ||||
|   | ||||
							
								
								
									
										21
									
								
								fedireads/migrations/0039_auto_20200510_2342.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								fedireads/migrations/0039_auto_20200510_2342.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,21 @@ | ||||
| # Generated by Django 3.0.3 on 2020-05-10 23:42 | ||||
|  | ||||
| from django.db import migrations | ||||
|  | ||||
|  | ||||
| class Migration(migrations.Migration): | ||||
|  | ||||
|     dependencies = [ | ||||
|         ('fedireads', '0038_author_remote_id'), | ||||
|     ] | ||||
|  | ||||
|     operations = [ | ||||
|         migrations.RemoveField( | ||||
|             model_name='book', | ||||
|             name='misc_identifiers', | ||||
|         ), | ||||
|         migrations.RemoveField( | ||||
|             model_name='connector', | ||||
|             name='key_name', | ||||
|         ), | ||||
|     ] | ||||
| @@ -29,8 +29,6 @@ class Connector(FedireadsModel): | ||||
|     covers_url = models.CharField(max_length=255) | ||||
|     search_url = models.CharField(max_length=255, null=True) | ||||
|  | ||||
|     key_name = models.CharField(max_length=255) | ||||
|  | ||||
|     politeness_delay = models.IntegerField(null=True) #seconds | ||||
|     max_query_count = models.IntegerField(null=True) | ||||
|     # how many queries executed in a unit of time, like a day | ||||
| @@ -54,7 +52,6 @@ class Book(FedireadsModel): | ||||
|     openlibrary_key = models.CharField(max_length=255, blank=True, null=True) | ||||
|     librarything_key = models.CharField(max_length=255, blank=True, null=True) | ||||
|     goodreads_key = models.CharField(max_length=255, blank=True, null=True) | ||||
|     misc_identifiers = JSONField(null=True) | ||||
|  | ||||
|     # info about where the data comes from and where/if to sync | ||||
|     sync = models.BooleanField(default=True) | ||||
|   | ||||
| @@ -18,7 +18,6 @@ class FedireadsConnector(TestCase): | ||||
|             books_url='https://example.com', | ||||
|             covers_url='https://example.com/images/covers', | ||||
|             search_url='https://example.com/search?q=', | ||||
|             key_name='remote_id', | ||||
|         ) | ||||
|         self.connector = Connector('example.com') | ||||
|  | ||||
|   | ||||
| @@ -8,7 +8,7 @@ import pytz | ||||
| from fedireads import models | ||||
| from fedireads.connectors.openlibrary import Connector | ||||
| from fedireads.connectors.openlibrary import get_languages, get_description | ||||
| from fedireads.connectors.openlibrary import pick_default_edition | ||||
| from fedireads.connectors.openlibrary import pick_default_edition, get_openlibrary_key | ||||
| from fedireads.connectors.abstract_connector import SearchResult, get_date | ||||
|  | ||||
|  | ||||
| @@ -22,7 +22,6 @@ class Openlibrary(TestCase): | ||||
|             books_url='https://openlibrary.org', | ||||
|             covers_url='https://covers.openlibrary.org', | ||||
|             search_url='https://openlibrary.org/search?q=', | ||||
|             key_name='openlibrary_key', | ||||
|         ) | ||||
|         self.connector = Connector('openlibrary.org') | ||||
|  | ||||
| @@ -77,3 +76,9 @@ class Openlibrary(TestCase): | ||||
|     def test_get_languages(self): | ||||
|         languages = get_languages(self.edition_data['languages']) | ||||
|         self.assertEqual(languages, ['English']) | ||||
|  | ||||
|  | ||||
|     def test_get_ol_key(self): | ||||
|         key = get_openlibrary_key('/books/OL27320736M') | ||||
|         self.assertEqual(key, 'OL27320736M') | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user