Expand matching books on keys like isbn

This commit is contained in:
Mouse Reeve 2020-05-03 21:00:25 -07:00
parent 3c3afed6b3
commit 07aab3806b
4 changed files with 109 additions and 68 deletions

View File

@ -2,6 +2,7 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dateutil import parser from dateutil import parser
import pytz import pytz
import requests
from fedireads import models from fedireads import models
@ -33,10 +34,27 @@ class AbstractConnector(ABC):
return True return True
@abstractmethod
def search(self, query): def search(self, query):
''' free text search ''' ''' free text search '''
# return list of search result objs resp = requests.get(
'%s%s' % (self.search_url, query),
headers={
'Accept': 'application/json; charset=utf-8',
},
)
if not resp.ok:
resp.raise_for_status()
data = resp.json()
results = []
for doc in data['docs'][:10]:
results.append(self.format_search_result(doc))
return results
@abstractmethod
def format_search_result(self, search_result):
''' create a SearchResult obj from json '''
@abstractmethod @abstractmethod
@ -82,6 +100,37 @@ def update_from_mappings(obj, data, mappings):
return obj return obj
def match_from_mappings(data, mappings):
''' try to find existing copies of this book using various keys '''
keys = [
('openlibrary_key', models.Book),
('librarything_key', models.Book),
('goodreads_key', models.Book),
('lccn', models.Work),
('isbn_10', models.Edition),
('isbn_13', models.Edition),
('oclc_number', models.Edition),
('asin', models.Edition),
]
noop = lambda x: x
for key, model in keys:
formatter = None
if key in mappings:
key, formatter = mappings[key]
if not formatter:
formatter = noop
value = data.get(key)
if not value:
continue
value = formatter(value)
match = model.objects.select_subclasses().filter(
**{key: value}).first()
if match:
return match
def has_attr(obj, key): def has_attr(obj, key):
''' helper function to check if a model object has a key ''' ''' helper function to check if a model object has a key '''
try: try:
@ -100,12 +149,11 @@ def get_date(date_string):
class SearchResult: class SearchResult:
''' standardized search result object ''' ''' standardized search result object '''
def __init__(self, title, key, author, year, raw_data): def __init__(self, title, key, author, year):
self.title = title self.title = title
self.key = key self.key = key
self.author = author self.author = author
self.year = year self.year = year
self.raw_data = raw_data
def __repr__(self): def __repr__(self):
return "<SearchResult key={!r} title={!r} author={!r}>".format( return "<SearchResult key={!r} title={!r} author={!r}>".format(

View File

@ -4,48 +4,37 @@ from django.core.files.base import ContentFile
import requests import requests
from fedireads import models from fedireads import models
from .abstract_connector import AbstractConnector from .abstract_connector import AbstractConnector, SearchResult, get_date
from .abstract_connector import update_from_mappings, get_date from .abstract_connector import match_from_mappings, update_from_mappings
class Connector(AbstractConnector): class Connector(AbstractConnector):
''' interact with other instances ''' ''' interact with other instances '''
def search(self, query): def format_search_result(self, search_result):
''' right now you can't search fedireads, but... ''' return SearchResult(**search_result)
resp = requests.get(
'%s%s' % (self.search_url, query),
headers={
'Accept': 'application/activity+json; charset=utf-8',
},
)
if not resp.ok:
resp.raise_for_status()
return resp.json()
def get_or_create_book(self, remote_id): def get_or_create_book(self, remote_id):
''' pull up a book record by whatever means possible ''' ''' pull up a book record by whatever means possible '''
try: book = models.Book.objects.select_subclasses().filter(
book = models.Book.objects.select_subclasses().get( remote_id=remote_id
remote_id=remote_id ).first()
) if book:
if isinstance(book, models.Work):
return book.default_edition
return book return book
except ObjectDoesNotExist:
if self.model.is_self: # no book was found, so we start creating a new one
# we can't load a book from a remote server, this is it book = models.Book(remote_id=remote_id)
return None self.update_book(book)
# no book was found, so we start creating a new one
book = models.Book(remote_id=remote_id)
def update_book(self, book, data=None): def update_book(self, book, data=None):
''' add remote data to a local book ''' ''' add remote data to a local book '''
remote_id = book.remote_id
if not data: if not data:
response = requests.get( response = requests.get(
'%s/%s' % (self.base_url, remote_id), book.remote_id,
headers={ headers={
'Accept': 'application/activity+json; charset=utf-8', 'Accept': 'application/activity+json; charset=utf-8',
}, },
@ -55,6 +44,10 @@ class Connector(AbstractConnector):
data = response.json() data = response.json()
match = match_from_mappings(data, {})
if match:
return match
# great, we can update our book. # great, we can update our book.
mappings = { mappings = {
'published_date': ('published_date', get_date), 'published_date': ('published_date', get_date),

View File

@ -7,7 +7,8 @@ from django.db import transaction
from fedireads import models from fedireads import models
from .abstract_connector import AbstractConnector, SearchResult from .abstract_connector import AbstractConnector, SearchResult
from .abstract_connector import update_from_mappings, get_date from .abstract_connector import match_from_mappings, update_from_mappings
from .abstract_connector import get_date
from .openlibrary_languages import languages from .openlibrary_languages import languages
@ -15,45 +16,34 @@ class Connector(AbstractConnector):
''' instantiate a connector for OL ''' ''' instantiate a connector for OL '''
def __init__(self, identifier): def __init__(self, identifier):
get_first = lambda a: a[0] get_first = lambda a: a[0]
self.book_mappings = { self.key_mappings = {
'publish_date': ('published_date', get_date),
'first_publish_date': ('first_published_date', get_date),
'description': ('description', get_description),
'isbn_13': ('isbn_13', get_first), 'isbn_13': ('isbn_13', get_first),
'oclc_numbers': ('oclc_number', get_first), 'oclc_numbers': ('oclc_number', get_first),
'lccn': ('lccn', get_first), 'lccn': ('lccn', get_first),
}
self.book_mappings = self.key_mappings.copy()
self.book_mappings.update({
'publish_date': ('published_date', get_date),
'first_publish_date': ('first_published_date', get_date),
'description': ('description', get_description),
'languages': ('languages', get_languages), 'languages': ('languages', get_languages),
'number_of_pages': ('pages', None), 'number_of_pages': ('pages', None),
'series': ('series', get_first), 'series': ('series', get_first),
} })
super().__init__(identifier) super().__init__(identifier)
def search(self, query): def format_search_result(self, doc):
''' query openlibrary search ''' key = doc['key']
resp = requests.get( key = key.split('/')[-1]
'%s%s' % (self.search_url, query), author = doc.get('author_name') or ['Unknown']
headers={ return SearchResult(
'Accept': 'application/json; charset=utf-8', doc.get('title'),
}, key,
author[0],
doc.get('first_publish_year'),
) )
if not resp.ok:
resp.raise_for_status()
data = resp.json()
results = []
for doc in data['docs'][:5]:
key = doc['key']
key = key.split('/')[-1]
author = doc.get('author_name') or ['Unknown']
results.append(SearchResult(
doc.get('title'),
key,
author[0],
doc.get('first_publish_year'),
doc
))
return results
def get_or_create_book(self, olkey): def get_or_create_book(self, olkey):
@ -115,6 +105,11 @@ class Connector(AbstractConnector):
def create_book(self, key, data, model): def create_book(self, key, data, model):
''' create a work or edition from data ''' ''' create a work or edition from data '''
# we really would rather use an existing book than make a new one
match = match_from_mappings(data, self.key_mappings)
if match:
return match
book = model.objects.create( book = model.objects.create(
openlibrary_key=key, openlibrary_key=key,
title=data['title'], title=data['title'],
@ -145,7 +140,9 @@ class Connector(AbstractConnector):
if not book.sync and not book.sync_cover: if not book.sync and not book.sync_cover:
return return
data = self.load_book_data(book.openlibrary_key) if not data:
data = self.load_book_data(book.openlibrary_key)
if book.sync_cover and data.get('covers'): if book.sync_cover and data.get('covers'):
book.cover.save(*self.get_cover(data['covers'][0]), save=True) book.cover.save(*self.get_cover(data['covers'][0]), save=True)
if book.sync: if book.sync:

View File

@ -34,17 +34,20 @@ class Connector(AbstractConnector):
search_results = [] search_results = []
for book in results[:10]: for book in results[:10]:
search_results.append( search_results.append(
SearchResult( self.format_search_result(book)
book.title,
book.id,
book.author_text,
book.published_date.year if book.published_date else None,
None
)
) )
return search_results return search_results
def format_search_result(self, book):
return SearchResult(
book.title,
book.id,
book.author_text,
book.published_date.year if book.published_date else None,
)
def get_or_create_book(self, book_id): def get_or_create_book(self, book_id):
''' since this is querying its own data source, it can only ''' since this is querying its own data source, it can only
get a book, not load one from an external source ''' get a book, not load one from an external source '''