From 07aab3806b4ed1c6d4c936a3caf66a93febd8975 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sun, 3 May 2020 21:00:25 -0700 Subject: [PATCH] Expand matching books on keys like isbn --- fedireads/connectors/abstract_connector.py | 56 +++++++++++++++++-- fedireads/connectors/fedireads_connector.py | 45 +++++++--------- fedireads/connectors/openlibrary.py | 59 ++++++++++----------- fedireads/connectors/self_connector.py | 17 +++--- 4 files changed, 109 insertions(+), 68 deletions(-) diff --git a/fedireads/connectors/abstract_connector.py b/fedireads/connectors/abstract_connector.py index bb3df27c..0eb18c2d 100644 --- a/fedireads/connectors/abstract_connector.py +++ b/fedireads/connectors/abstract_connector.py @@ -2,6 +2,7 @@ from abc import ABC, abstractmethod from dateutil import parser import pytz +import requests from fedireads import models @@ -33,10 +34,27 @@ class AbstractConnector(ABC): return True - @abstractmethod def search(self, query): ''' free text search ''' - # return list of search result objs + resp = requests.get( + '%s%s' % (self.search_url, query), + headers={ + 'Accept': 'application/json; charset=utf-8', + }, + ) + if not resp.ok: + resp.raise_for_status() + data = resp.json() + results = [] + + for doc in data['docs'][:10]: + results.append(self.format_search_result(doc)) + return results + + + @abstractmethod + def format_search_result(self, search_result): + ''' create a SearchResult obj from json ''' @abstractmethod @@ -82,6 +100,37 @@ def update_from_mappings(obj, data, mappings): return obj +def match_from_mappings(data, mappings): + ''' try to find existing copies of this book using various keys ''' + keys = [ + ('openlibrary_key', models.Book), + ('librarything_key', models.Book), + ('goodreads_key', models.Book), + ('lccn', models.Work), + ('isbn_10', models.Edition), + ('isbn_13', models.Edition), + ('oclc_number', models.Edition), + ('asin', models.Edition), + ] + noop = lambda x: x + for key, model in keys: + formatter = None + if key in mappings: + key, formatter = mappings[key] + if not formatter: + formatter = noop + + value = data.get(key) + if not value: + continue + value = formatter(value) + + match = model.objects.select_subclasses().filter( + **{key: value}).first() + if match: + return match + + def has_attr(obj, key): ''' helper function to check if a model object has a key ''' try: @@ -100,12 +149,11 @@ def get_date(date_string): class SearchResult: ''' standardized search result object ''' - def __init__(self, title, key, author, year, raw_data): + def __init__(self, title, key, author, year): self.title = title self.key = key self.author = author self.year = year - self.raw_data = raw_data def __repr__(self): return "".format( diff --git a/fedireads/connectors/fedireads_connector.py b/fedireads/connectors/fedireads_connector.py index fc354b18..7fbc4f49 100644 --- a/fedireads/connectors/fedireads_connector.py +++ b/fedireads/connectors/fedireads_connector.py @@ -4,48 +4,37 @@ from django.core.files.base import ContentFile import requests from fedireads import models -from .abstract_connector import AbstractConnector -from .abstract_connector import update_from_mappings, get_date +from .abstract_connector import AbstractConnector, SearchResult, get_date +from .abstract_connector import match_from_mappings, update_from_mappings class Connector(AbstractConnector): ''' interact with other instances ''' - def search(self, query): - ''' right now you can't search fedireads, but... ''' - resp = requests.get( - '%s%s' % (self.search_url, query), - headers={ - 'Accept': 'application/activity+json; charset=utf-8', - }, - ) - if not resp.ok: - resp.raise_for_status() - - return resp.json() + def format_search_result(self, search_result): + return SearchResult(**search_result) def get_or_create_book(self, remote_id): ''' pull up a book record by whatever means possible ''' - try: - book = models.Book.objects.select_subclasses().get( - remote_id=remote_id - ) + book = models.Book.objects.select_subclasses().filter( + remote_id=remote_id + ).first() + if book: + if isinstance(book, models.Work): + return book.default_edition return book - except ObjectDoesNotExist: - if self.model.is_self: - # we can't load a book from a remote server, this is it - return None - # no book was found, so we start creating a new one - book = models.Book(remote_id=remote_id) + + # no book was found, so we start creating a new one + book = models.Book(remote_id=remote_id) + self.update_book(book) def update_book(self, book, data=None): ''' add remote data to a local book ''' - remote_id = book.remote_id if not data: response = requests.get( - '%s/%s' % (self.base_url, remote_id), + book.remote_id, headers={ 'Accept': 'application/activity+json; charset=utf-8', }, @@ -55,6 +44,10 @@ class Connector(AbstractConnector): data = response.json() + match = match_from_mappings(data, {}) + if match: + return match + # great, we can update our book. mappings = { 'published_date': ('published_date', get_date), diff --git a/fedireads/connectors/openlibrary.py b/fedireads/connectors/openlibrary.py index 1f3b3e43..4efe9ab3 100644 --- a/fedireads/connectors/openlibrary.py +++ b/fedireads/connectors/openlibrary.py @@ -7,7 +7,8 @@ from django.db import transaction from fedireads import models from .abstract_connector import AbstractConnector, SearchResult -from .abstract_connector import update_from_mappings, get_date +from .abstract_connector import match_from_mappings, update_from_mappings +from .abstract_connector import get_date from .openlibrary_languages import languages @@ -15,45 +16,34 @@ class Connector(AbstractConnector): ''' instantiate a connector for OL ''' def __init__(self, identifier): get_first = lambda a: a[0] - self.book_mappings = { - 'publish_date': ('published_date', get_date), - 'first_publish_date': ('first_published_date', get_date), - 'description': ('description', get_description), + self.key_mappings = { 'isbn_13': ('isbn_13', get_first), 'oclc_numbers': ('oclc_number', get_first), 'lccn': ('lccn', get_first), + } + + self.book_mappings = self.key_mappings.copy() + self.book_mappings.update({ + 'publish_date': ('published_date', get_date), + 'first_publish_date': ('first_published_date', get_date), + 'description': ('description', get_description), 'languages': ('languages', get_languages), 'number_of_pages': ('pages', None), 'series': ('series', get_first), - } + }) super().__init__(identifier) - def search(self, query): - ''' query openlibrary search ''' - resp = requests.get( - '%s%s' % (self.search_url, query), - headers={ - 'Accept': 'application/json; charset=utf-8', - }, + def format_search_result(self, doc): + key = doc['key'] + key = key.split('/')[-1] + author = doc.get('author_name') or ['Unknown'] + return SearchResult( + doc.get('title'), + key, + author[0], + doc.get('first_publish_year'), ) - if not resp.ok: - resp.raise_for_status() - data = resp.json() - results = [] - - for doc in data['docs'][:5]: - key = doc['key'] - key = key.split('/')[-1] - author = doc.get('author_name') or ['Unknown'] - results.append(SearchResult( - doc.get('title'), - key, - author[0], - doc.get('first_publish_year'), - doc - )) - return results def get_or_create_book(self, olkey): @@ -115,6 +105,11 @@ class Connector(AbstractConnector): def create_book(self, key, data, model): ''' create a work or edition from data ''' + # we really would rather use an existing book than make a new one + match = match_from_mappings(data, self.key_mappings) + if match: + return match + book = model.objects.create( openlibrary_key=key, title=data['title'], @@ -145,7 +140,9 @@ class Connector(AbstractConnector): if not book.sync and not book.sync_cover: return - data = self.load_book_data(book.openlibrary_key) + if not data: + data = self.load_book_data(book.openlibrary_key) + if book.sync_cover and data.get('covers'): book.cover.save(*self.get_cover(data['covers'][0]), save=True) if book.sync: diff --git a/fedireads/connectors/self_connector.py b/fedireads/connectors/self_connector.py index 4cf47f73..dfcd3645 100644 --- a/fedireads/connectors/self_connector.py +++ b/fedireads/connectors/self_connector.py @@ -34,17 +34,20 @@ class Connector(AbstractConnector): search_results = [] for book in results[:10]: search_results.append( - SearchResult( - book.title, - book.id, - book.author_text, - book.published_date.year if book.published_date else None, - None - ) + self.format_search_result(book) ) return search_results + def format_search_result(self, book): + return SearchResult( + book.title, + book.id, + book.author_text, + book.published_date.year if book.published_date else None, + ) + + def get_or_create_book(self, book_id): ''' since this is querying its own data source, it can only get a book, not load one from an external source '''