isbn search

This commit is contained in:
Henri Bourcereau
2021-03-01 21:09:21 +01:00
parent 79c3ae1c44
commit 703ff60271
18 changed files with 358 additions and 4 deletions

View File

@ -26,6 +26,7 @@ class AbstractMinimalConnector(ABC):
'books_url',
'covers_url',
'search_url',
'isbn_search_url',
'max_query_count',
'name',
'identifier',
@ -61,6 +62,30 @@ class AbstractMinimalConnector(ABC):
results.append(self.format_search_result(doc))
return results
def isbn_search(self, query):
''' isbn search '''
params = {}
resp = requests.get(
'%s%s' % (self.isbn_search_url, query),
params=params,
headers={
'Accept': 'application/json; charset=utf-8',
'User-Agent': settings.USER_AGENT,
},
)
if not resp.ok:
resp.raise_for_status()
try:
data = resp.json()
except ValueError as e:
logger.exception(e)
raise ConnectorException('Unable to parse json response', e)
results = []
for doc in self.parse_isbn_search_data(data):
results.append(self.format_isbn_search_result(doc))
return results
@abstractmethod
def get_or_create_book(self, remote_id):
''' pull up a book record by whatever means possible '''
@ -73,6 +98,14 @@ class AbstractMinimalConnector(ABC):
def format_search_result(self, search_result):
''' create a SearchResult obj from json '''
@abstractmethod
def parse_isbn_search_data(self, data):
''' turn the result json from a search into a list '''
@abstractmethod
def format_isbn_search_result(self, search_result):
''' create a SearchResult obj from json '''
class AbstractConnector(AbstractMinimalConnector):
''' generic book data connector '''

View File

@ -19,3 +19,11 @@ class Connector(AbstractMinimalConnector):
def format_search_result(self, search_result):
search_result['connector'] = self
return SearchResult(**search_result)
def parse_isbn_search_data(self, data):
return data
def format_isbn_search_result(self, search_result):
search_result['connector'] = self
return SearchResult(**search_result)

View File

@ -1,5 +1,6 @@
''' interface with whatever connectors the app has '''
import importlib
import re
from urllib.parse import urlparse
from requests import HTTPError
@ -15,13 +16,31 @@ class ConnectorException(HTTPError):
def search(query, min_confidence=0.1):
''' find books based on arbitary keywords '''
results = []
# Have we got a ISBN ?
isbn = re.sub('[\W_]', '', query)
maybe_isbn = len(isbn) in [10, 13] # ISBN10 or ISBN13
dedup_slug = lambda r: '%s/%s/%s' % (r.title, r.author, r.year)
result_index = set()
for connector in get_connectors():
try:
result_set = connector.search(query, min_confidence=min_confidence)
except (HTTPError, ConnectorException):
continue
result_set = None
if maybe_isbn:
# Search on ISBN
if not connector.isbn_search_url or connector.isbn_search_url == '':
result_set = []
else:
try:
result_set = connector.isbn_search(isbn)
except (HTTPError, ConnectorException):
pass
# if no isbn search or results, we fallback to generic search
if result_set == None or result_set == []:
try:
result_set = connector.search(query, min_confidence=min_confidence)
except (HTTPError, ConnectorException):
continue
result_set = [r for r in result_set \
if dedup_slug(r) not in result_index]
@ -41,6 +60,12 @@ def local_search(query, min_confidence=0.1, raw=False):
return connector.search(query, min_confidence=min_confidence, raw=raw)
def isbn_local_search(query, raw=False):
''' only look at local search results '''
connector = load_connector(models.Connector.objects.get(local=True))
return connector.isbn_search(query, raw=raw)
def first_search_result(query, min_confidence=0.1):
''' search until you find a result that fits '''
for connector in get_connectors():

View File

@ -129,6 +129,22 @@ class Connector(AbstractConnector):
)
def parse_isbn_search_data(self, data):
return list(data.values())
def format_isbn_search_result(self, search_result):
# build the remote id from the openlibrary key
key = self.books_url + search_result['key']
authors = search_result.get('authors') or [{'name': 'Unknown'}]
author_names = [ author.get('name') for author in authors]
return SearchResult(
title=search_result.get('title'),
key=key,
author=', '.join(author_names),
connector=self,
year=search_result.get('publish_date'),
)
def load_edition_data(self, olkey):
''' query openlibrary for editions of a work '''
url = '%s/works/%s/editions' % (self.books_url, olkey)

View File

@ -33,6 +33,31 @@ class Connector(AbstractConnector):
search_results.sort(key=lambda r: r.confidence, reverse=True)
return search_results
def isbn_search(self, query, raw=False):
''' search your local database '''
if not query:
return []
filters = [{f: query} for f in ['isbn_10', 'isbn_13']]
results = models.Edition.objects.filter(
reduce(operator.or_, (Q(**f) for f in filters))
).distinct()
# when there are multiple editions of the same work, pick the default.
# it would be odd for this to happen.
results = results.filter(parent_work__default_edition__id=F('id')) \
or results
search_results = []
for result in results:
if raw:
search_results.append(result)
else:
search_results.append(self.format_search_result(result))
if len(search_results) >= 10:
break
return search_results
def format_search_result(self, search_result):
return SearchResult(
@ -47,6 +72,19 @@ class Connector(AbstractConnector):
)
def format_isbn_search_result(self, search_result):
return SearchResult(
title=search_result.title,
key=search_result.remote_id,
author=search_result.author_text,
year=search_result.published_date.year if \
search_result.published_date else None,
connector=self,
confidence=search_result.rank if \
hasattr(search_result, 'rank') else 1,
)
def is_work_data(self, data):
pass
@ -59,6 +97,10 @@ class Connector(AbstractConnector):
def get_authors_from_data(self, data):
return None
def parse_isbn_search_data(self, data):
''' it's already in the right format, don't even worry about it '''
return data
def parse_search_data(self, data):
''' it's already in the right format, don't even worry about it '''
return data