Stop assuming every book is Hamlet

This commit is contained in:
Mouse Reeve
2020-10-29 15:29:23 -07:00
parent a46d7f5dc7
commit 7ce0890a41
7 changed files with 37 additions and 32 deletions

View File

@ -1,8 +1,8 @@
''' functionality outline for a book data connector '''
from abc import ABC, abstractmethod
from dataclasses import dataclass
from dateutil import parser
import pytz
from urllib3.exceptions import ProtocolError
import requests
from requests import HTTPError
@ -52,7 +52,7 @@ class AbstractConnector(ABC):
return True
def search(self, query):
def search(self, query, min_confidence=None):
''' free text search '''
resp = requests.get(
'%s%s' % (self.search_url, query),
@ -160,7 +160,7 @@ class AbstractConnector(ABC):
author_text = []
for author in self.get_authors_from_data(data):
book.authors.add(author)
author_text += author.display_name
author_text.append(author.display_name)
book.author_text = ', '.join(author_text)
book.save()
@ -298,7 +298,7 @@ def get_data(url):
'Accept': 'application/json; charset=utf-8',
},
)
except ProtocolError:
except ConnectionError:
raise ConnectorException()
if not resp.ok:
resp.raise_for_status()
@ -306,13 +306,14 @@ def get_data(url):
return data
@dataclass
class SearchResult:
''' standardized search result object '''
def __init__(self, title, key, author, year):
self.title = title
self.key = key
self.author = author
self.year = year
title: str
key: str
author: str
year: str
confidence: int = 1
def __repr__(self):
return "<SearchResult key={!r} title={!r} author={!r}>".format(

View File

@ -129,10 +129,10 @@ class Connector(AbstractConnector):
key = self.books_url + search_result['key']
author = search_result.get('author_name') or ['Unknown']
return SearchResult(
search_result.get('title'),
key,
', '.join(author),
search_result.get('first_publish_year'),
title=search_result.get('title'),
key=key,
author=', '.join(author),
year=search_result.get('first_publish_year'),
)

View File

@ -7,7 +7,7 @@ from .abstract_connector import AbstractConnector, SearchResult
class Connector(AbstractConnector):
''' instantiate a connector '''
def search(self, query):
def search(self, query, min_confidence=0.1):
''' right now you can't search bookwyrm sorry, but when
that gets implemented it will totally rule '''
vector = SearchVector('title', weight='A') +\
@ -28,7 +28,7 @@ class Connector(AbstractConnector):
).annotate(
rank=SearchRank(vector, query)
).filter(
rank__gt=0
rank__gt=min_confidence
).order_by('-rank')
results = results.filter(default=True) or results
@ -42,11 +42,12 @@ class Connector(AbstractConnector):
def format_search_result(self, search_result):
return SearchResult(
search_result.title,
search_result.local_id,
search_result.author_text,
search_result.published_date.year if \
title=search_result.title,
key=search_result.local_id,
author=search_result.author_text,
year=search_result.published_date.year if \
search_result.published_date else None,
confidence=search_result.rank,
)