Merge branch 'main' into inventaire

This commit is contained in:
Mouse Reeve
2021-04-26 14:22:05 -07:00
280 changed files with 20693 additions and 9991 deletions

View File

@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
class AbstractMinimalConnector(ABC):
""" just the bare bones, for other bookwyrm instances """
"""just the bare bones, for other bookwyrm instances"""
def __init__(self, identifier):
# load connector settings
@ -39,7 +39,7 @@ class AbstractMinimalConnector(ABC):
setattr(self, field, getattr(info, field))
def search(self, query, min_confidence=None):
""" free text search """
"""free text search"""
params = {}
if min_confidence:
params["min_confidence"] = min_confidence
@ -55,7 +55,7 @@ class AbstractMinimalConnector(ABC):
return results
def isbn_search(self, query):
""" isbn search """
"""isbn search"""
params = {}
data = self.get_search_data(
"%s%s" % (self.isbn_search_url, query),
@ -74,27 +74,27 @@ class AbstractMinimalConnector(ABC):
@abstractmethod
def get_or_create_book(self, remote_id):
""" pull up a book record by whatever means possible """
"""pull up a book record by whatever means possible"""
@abstractmethod
def parse_search_data(self, data):
""" turn the result json from a search into a list """
"""turn the result json from a search into a list"""
@abstractmethod
def format_search_result(self, search_result):
""" create a SearchResult obj from json """
"""create a SearchResult obj from json"""
@abstractmethod
def parse_isbn_search_data(self, data):
""" turn the result json from a search into a list """
"""turn the result json from a search into a list"""
@abstractmethod
def format_isbn_search_result(self, search_result):
""" create a SearchResult obj from json """
"""create a SearchResult obj from json"""
class AbstractConnector(AbstractMinimalConnector):
""" generic book data connector """
"""generic book data connector"""
def __init__(self, identifier):
super().__init__(identifier)
@ -103,14 +103,14 @@ class AbstractConnector(AbstractMinimalConnector):
self.book_mappings = []
def is_available(self):
""" check if you're allowed to use this connector """
"""check if you're allowed to use this connector"""
if self.max_query_count is not None:
if self.connector.query_count >= self.max_query_count:
return False
return True
def get_or_create_book(self, remote_id):
""" translate arbitrary json into an Activitypub dataclass """
"""translate arbitrary json into an Activitypub dataclass"""
# first, check if we have the origin_id saved
existing = models.Edition.find_existing_by_remote_id(
remote_id
@ -159,7 +159,7 @@ class AbstractConnector(AbstractMinimalConnector):
return get_data(remote_id)
def create_edition_from_data(self, work, edition_data):
""" if we already have the work, we're ready """
"""if we already have the work, we're ready"""
mapped_data = dict_from_mappings(edition_data, self.book_mappings)
mapped_data["work"] = work.remote_id
edition_activity = activitypub.Edition(**mapped_data)
@ -179,7 +179,7 @@ class AbstractConnector(AbstractMinimalConnector):
return edition
def get_or_create_author(self, remote_id):
""" load that author """
"""load that author"""
existing = models.Author.find_existing_by_remote_id(remote_id)
if existing:
return existing
@ -187,29 +187,33 @@ class AbstractConnector(AbstractMinimalConnector):
data = self.get_book_data(remote_id)
mapped_data = dict_from_mappings(data, self.author_mappings)
activity = activitypub.Author(**mapped_data)
try:
activity = activitypub.Author(**mapped_data)
except activitypub.ActivitySerializerError:
return None
# this will dedupe
return activity.to_model(model=models.Author)
@abstractmethod
def is_work_data(self, data):
""" differentiate works and editions """
"""differentiate works and editions"""
@abstractmethod
def get_edition_from_work_data(self, data):
""" every work needs at least one edition """
"""every work needs at least one edition"""
@abstractmethod
def get_work_from_edition_data(self, data):
""" every edition needs a work """
"""every edition needs a work"""
@abstractmethod
def get_authors_from_data(self, data):
""" load author data """
"""load author data"""
@abstractmethod
def expand_book_data(self, book):
""" get more info on a book """
"""get more info on a book"""
def dict_from_mappings(data, mappings):
@ -222,7 +226,13 @@ def dict_from_mappings(data, mappings):
def get_data(url, params=None):
""" wrapper for request.get """
"""wrapper for request.get"""
# check if the url is blocked
if models.FederatedServer.is_blocked(url):
raise ConnectorException(
"Attempting to load data from blocked url: {:s}".format(url)
)
try:
resp = requests.get(
url,
@ -248,7 +258,7 @@ def get_data(url, params=None):
def get_image(url):
""" wrapper for requesting an image """
"""wrapper for requesting an image"""
try:
resp = requests.get(
url,
@ -266,7 +276,7 @@ def get_image(url):
@dataclass
class SearchResult:
""" standardized search result object """
"""standardized search result object"""
title: str
key: str
@ -283,14 +293,14 @@ class SearchResult:
)
def json(self):
""" serialize a connector for json response """
"""serialize a connector for json response"""
serialized = asdict(self)
del serialized["connector"]
return serialized
class Mapping:
""" associate a local database field with a field in an external dataset """
"""associate a local database field with a field in an external dataset"""
def __init__(self, local_field, remote_field=None, formatter=None):
noop = lambda x: x
@ -300,7 +310,7 @@ class Mapping:
self.formatter = formatter or noop
def get_value(self, data):
""" pull a field from incoming json and return the formatted version """
"""pull a field from incoming json and return the formatted version"""
value = data.get(self.remote_field)
if not value:
return None

View File

@ -4,7 +4,7 @@ from .abstract_connector import AbstractMinimalConnector, SearchResult
class Connector(AbstractMinimalConnector):
""" this is basically just for search """
"""this is basically just for search"""
def get_or_create_book(self, remote_id):
edition = activitypub.resolve_remote_id(remote_id, model=models.Edition)

View File

@ -1,5 +1,6 @@
""" interface with whatever connectors the app has """
import importlib
import logging
import re
from urllib.parse import urlparse
@ -11,13 +12,15 @@ from requests import HTTPError
from bookwyrm import models
from bookwyrm.tasks import app
logger = logging.getLogger(__name__)
class ConnectorException(HTTPError):
""" when the connector can't do what was asked """
"""when the connector can't do what was asked"""
def search(query, min_confidence=0.1):
""" find books based on arbitary keywords """
"""find books based on arbitary keywords"""
if not query:
return []
results = []
@ -37,14 +40,17 @@ def search(query, min_confidence=0.1):
else:
try:
result_set = connector.isbn_search(isbn)
except (HTTPError, ConnectorException):
pass
except Exception as e: # pylint: disable=broad-except
logger.exception(e)
continue
# if no isbn search or results, we fallback to generic search
if result_set in (None, []):
try:
result_set = connector.search(query, min_confidence=min_confidence)
except (HTTPError, ConnectorException):
except Exception as e: # pylint: disable=broad-except
# we don't want *any* error to crash the whole search page
logger.exception(e)
continue
# if the search results look the same, ignore them
@ -61,20 +67,22 @@ def search(query, min_confidence=0.1):
return results
def local_search(query, min_confidence=0.1, raw=False):
""" only look at local search results """
def local_search(query, min_confidence=0.1, raw=False, filters=None):
"""only look at local search results"""
connector = load_connector(models.Connector.objects.get(local=True))
return connector.search(query, min_confidence=min_confidence, raw=raw)
return connector.search(
query, min_confidence=min_confidence, raw=raw, filters=filters
)
def isbn_local_search(query, raw=False):
""" only look at local search results """
"""only look at local search results"""
connector = load_connector(models.Connector.objects.get(local=True))
return connector.isbn_search(query, raw=raw)
def first_search_result(query, min_confidence=0.1):
""" search until you find a result that fits """
"""search until you find a result that fits"""
for connector in get_connectors():
result = connector.search(query, min_confidence=min_confidence)
if result:
@ -83,13 +91,13 @@ def first_search_result(query, min_confidence=0.1):
def get_connectors():
""" load all connectors """
"""load all connectors"""
for info in models.Connector.objects.order_by("priority").all():
yield load_connector(info)
def get_or_create_connector(remote_id):
""" get the connector related to the object's server """
"""get the connector related to the object's server"""
url = urlparse(remote_id)
identifier = url.netloc
if not identifier:
@ -113,7 +121,7 @@ def get_or_create_connector(remote_id):
@app.task
def load_more_data(connector_id, book_id):
""" background the work of getting all 10,000 editions of LoTR """
"""background the work of getting all 10,000 editions of LoTR"""
connector_info = models.Connector.objects.get(id=connector_id)
connector = load_connector(connector_info)
book = models.Book.objects.select_subclasses().get(id=book_id)
@ -121,7 +129,7 @@ def load_more_data(connector_id, book_id):
def load_connector(connector_info):
""" instantiate the connector class """
"""instantiate the connector class"""
connector = importlib.import_module(
"bookwyrm.connectors.%s" % connector_info.connector_file
)
@ -131,6 +139,6 @@ def load_connector(connector_info):
@receiver(signals.post_save, sender="bookwyrm.FederatedServer")
# pylint: disable=unused-argument
def create_connector(sender, instance, created, *args, **kwargs):
""" create a connector to an external bookwyrm server """
"""create a connector to an external bookwyrm server"""
if instance.application_type == "bookwyrm":
get_or_create_connector("https://{:s}".format(instance.server_name))

View File

@ -9,7 +9,7 @@ from .openlibrary_languages import languages
class Connector(AbstractConnector):
""" instantiate a connector for OL """
"""instantiate a connector for OL"""
def __init__(self, identifier):
super().__init__(identifier)
@ -59,7 +59,7 @@ class Connector(AbstractConnector):
]
def get_remote_id_from_data(self, data):
""" format a url from an openlibrary id field """
"""format a url from an openlibrary id field"""
try:
key = data["key"]
except KeyError:
@ -87,16 +87,19 @@ class Connector(AbstractConnector):
return get_data(url)
def get_authors_from_data(self, data):
""" parse author json and load or create authors """
"""parse author json and load or create authors"""
for author_blob in data.get("authors", []):
author_blob = author_blob.get("author", author_blob)
# this id is "/authors/OL1234567A"
author_id = author_blob["key"]
url = "%s%s" % (self.base_url, author_id)
yield self.get_or_create_author(url)
author = self.get_or_create_author(url)
if not author:
continue
yield author
def get_cover_url(self, cover_blob, size="L"):
""" ask openlibrary for the cover """
"""ask openlibrary for the cover"""
if not cover_blob:
return None
cover_id = cover_blob[0]
@ -138,7 +141,7 @@ class Connector(AbstractConnector):
)
def load_edition_data(self, olkey):
""" query openlibrary for editions of a work """
"""query openlibrary for editions of a work"""
url = "%s/works/%s/editions" % (self.books_url, olkey)
return get_data(url)
@ -163,7 +166,7 @@ class Connector(AbstractConnector):
def ignore_edition(edition_data):
""" don't load a million editions that have no metadata """
"""don't load a million editions that have no metadata"""
# an isbn, we love to see it
if edition_data.get("isbn_13") or edition_data.get("isbn_10"):
return False
@ -182,19 +185,19 @@ def ignore_edition(edition_data):
def get_description(description_blob):
""" descriptions can be a string or a dict """
"""descriptions can be a string or a dict"""
if isinstance(description_blob, dict):
return description_blob.get("value")
return description_blob
def get_openlibrary_key(key):
""" convert /books/OL27320736M into OL27320736M """
"""convert /books/OL27320736M into OL27320736M"""
return key.split("/")[-1]
def get_languages(language_blob):
""" /language/eng -> English """
"""/language/eng -> English"""
langs = []
for lang in language_blob:
langs.append(languages.get(lang.get("key", ""), None))
@ -202,7 +205,7 @@ def get_languages(language_blob):
def pick_default_edition(options):
""" favor physical copies with covers in english """
"""favor physical copies with covers in english"""
if not options:
return None
if len(options) == 1:

View File

@ -10,18 +10,19 @@ from .abstract_connector import AbstractConnector, SearchResult
class Connector(AbstractConnector):
""" instantiate a connector """
"""instantiate a connector"""
# pylint: disable=arguments-differ
def search(self, query, min_confidence=0.1, raw=False):
""" search your local database """
def search(self, query, min_confidence=0.1, raw=False, filters=None):
"""search your local database"""
filters = filters or []
if not query:
return []
# first, try searching unqiue identifiers
results = search_identifiers(query)
results = search_identifiers(query, *filters)
if not results:
# then try searching title/author
results = search_title_author(query, min_confidence)
results = search_title_author(query, min_confidence, *filters)
search_results = []
for result in results:
if raw:
@ -35,7 +36,7 @@ class Connector(AbstractConnector):
return search_results
def isbn_search(self, query, raw=False):
""" search your local database """
"""search your local database"""
if not query:
return []
@ -87,26 +88,26 @@ class Connector(AbstractConnector):
return None
def parse_isbn_search_data(self, data):
""" it's already in the right format, don't even worry about it """
"""it's already in the right format, don't even worry about it"""
return data
def parse_search_data(self, data):
""" it's already in the right format, don't even worry about it """
"""it's already in the right format, don't even worry about it"""
return data
def expand_book_data(self, book):
pass
def search_identifiers(query):
""" tries remote_id, isbn; defined as dedupe fields on the model """
filters = [
def search_identifiers(query, *filters):
"""tries remote_id, isbn; defined as dedupe fields on the model"""
or_filters = [
{f.name: query}
for f in models.Edition._meta.get_fields()
if hasattr(f, "deduplication_field") and f.deduplication_field
]
results = models.Edition.objects.filter(
reduce(operator.or_, (Q(**f) for f in filters))
*filters, reduce(operator.or_, (Q(**f) for f in or_filters))
).distinct()
# when there are multiple editions of the same work, pick the default.
@ -114,8 +115,8 @@ def search_identifiers(query):
return results.filter(parent_work__default_edition__id=F("id")) or results
def search_title_author(query, min_confidence):
""" searches for title and author """
def search_title_author(query, min_confidence, *filters):
"""searches for title and author"""
vector = (
SearchVector("title", weight="A")
+ SearchVector("subtitle", weight="B")
@ -126,7 +127,7 @@ def search_title_author(query, min_confidence):
results = (
models.Edition.objects.annotate(search=vector)
.annotate(rank=SearchRank(vector, query))
.filter(rank__gt=min_confidence)
.filter(*filters, rank__gt=min_confidence)
.order_by("-rank")
)