Merge branch 'main' into inventaire
This commit is contained in:
@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AbstractMinimalConnector(ABC):
|
||||
""" just the bare bones, for other bookwyrm instances """
|
||||
"""just the bare bones, for other bookwyrm instances"""
|
||||
|
||||
def __init__(self, identifier):
|
||||
# load connector settings
|
||||
@ -39,7 +39,7 @@ class AbstractMinimalConnector(ABC):
|
||||
setattr(self, field, getattr(info, field))
|
||||
|
||||
def search(self, query, min_confidence=None):
|
||||
""" free text search """
|
||||
"""free text search"""
|
||||
params = {}
|
||||
if min_confidence:
|
||||
params["min_confidence"] = min_confidence
|
||||
@ -55,7 +55,7 @@ class AbstractMinimalConnector(ABC):
|
||||
return results
|
||||
|
||||
def isbn_search(self, query):
|
||||
""" isbn search """
|
||||
"""isbn search"""
|
||||
params = {}
|
||||
data = self.get_search_data(
|
||||
"%s%s" % (self.isbn_search_url, query),
|
||||
@ -74,27 +74,27 @@ class AbstractMinimalConnector(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def get_or_create_book(self, remote_id):
|
||||
""" pull up a book record by whatever means possible """
|
||||
"""pull up a book record by whatever means possible"""
|
||||
|
||||
@abstractmethod
|
||||
def parse_search_data(self, data):
|
||||
""" turn the result json from a search into a list """
|
||||
"""turn the result json from a search into a list"""
|
||||
|
||||
@abstractmethod
|
||||
def format_search_result(self, search_result):
|
||||
""" create a SearchResult obj from json """
|
||||
"""create a SearchResult obj from json"""
|
||||
|
||||
@abstractmethod
|
||||
def parse_isbn_search_data(self, data):
|
||||
""" turn the result json from a search into a list """
|
||||
"""turn the result json from a search into a list"""
|
||||
|
||||
@abstractmethod
|
||||
def format_isbn_search_result(self, search_result):
|
||||
""" create a SearchResult obj from json """
|
||||
"""create a SearchResult obj from json"""
|
||||
|
||||
|
||||
class AbstractConnector(AbstractMinimalConnector):
|
||||
""" generic book data connector """
|
||||
"""generic book data connector"""
|
||||
|
||||
def __init__(self, identifier):
|
||||
super().__init__(identifier)
|
||||
@ -103,14 +103,14 @@ class AbstractConnector(AbstractMinimalConnector):
|
||||
self.book_mappings = []
|
||||
|
||||
def is_available(self):
|
||||
""" check if you're allowed to use this connector """
|
||||
"""check if you're allowed to use this connector"""
|
||||
if self.max_query_count is not None:
|
||||
if self.connector.query_count >= self.max_query_count:
|
||||
return False
|
||||
return True
|
||||
|
||||
def get_or_create_book(self, remote_id):
|
||||
""" translate arbitrary json into an Activitypub dataclass """
|
||||
"""translate arbitrary json into an Activitypub dataclass"""
|
||||
# first, check if we have the origin_id saved
|
||||
existing = models.Edition.find_existing_by_remote_id(
|
||||
remote_id
|
||||
@ -159,7 +159,7 @@ class AbstractConnector(AbstractMinimalConnector):
|
||||
return get_data(remote_id)
|
||||
|
||||
def create_edition_from_data(self, work, edition_data):
|
||||
""" if we already have the work, we're ready """
|
||||
"""if we already have the work, we're ready"""
|
||||
mapped_data = dict_from_mappings(edition_data, self.book_mappings)
|
||||
mapped_data["work"] = work.remote_id
|
||||
edition_activity = activitypub.Edition(**mapped_data)
|
||||
@ -179,7 +179,7 @@ class AbstractConnector(AbstractMinimalConnector):
|
||||
return edition
|
||||
|
||||
def get_or_create_author(self, remote_id):
|
||||
""" load that author """
|
||||
"""load that author"""
|
||||
existing = models.Author.find_existing_by_remote_id(remote_id)
|
||||
if existing:
|
||||
return existing
|
||||
@ -187,29 +187,33 @@ class AbstractConnector(AbstractMinimalConnector):
|
||||
data = self.get_book_data(remote_id)
|
||||
|
||||
mapped_data = dict_from_mappings(data, self.author_mappings)
|
||||
activity = activitypub.Author(**mapped_data)
|
||||
try:
|
||||
activity = activitypub.Author(**mapped_data)
|
||||
except activitypub.ActivitySerializerError:
|
||||
return None
|
||||
|
||||
# this will dedupe
|
||||
return activity.to_model(model=models.Author)
|
||||
|
||||
@abstractmethod
|
||||
def is_work_data(self, data):
|
||||
""" differentiate works and editions """
|
||||
"""differentiate works and editions"""
|
||||
|
||||
@abstractmethod
|
||||
def get_edition_from_work_data(self, data):
|
||||
""" every work needs at least one edition """
|
||||
"""every work needs at least one edition"""
|
||||
|
||||
@abstractmethod
|
||||
def get_work_from_edition_data(self, data):
|
||||
""" every edition needs a work """
|
||||
"""every edition needs a work"""
|
||||
|
||||
@abstractmethod
|
||||
def get_authors_from_data(self, data):
|
||||
""" load author data """
|
||||
"""load author data"""
|
||||
|
||||
@abstractmethod
|
||||
def expand_book_data(self, book):
|
||||
""" get more info on a book """
|
||||
"""get more info on a book"""
|
||||
|
||||
|
||||
def dict_from_mappings(data, mappings):
|
||||
@ -222,7 +226,13 @@ def dict_from_mappings(data, mappings):
|
||||
|
||||
|
||||
def get_data(url, params=None):
|
||||
""" wrapper for request.get """
|
||||
"""wrapper for request.get"""
|
||||
# check if the url is blocked
|
||||
if models.FederatedServer.is_blocked(url):
|
||||
raise ConnectorException(
|
||||
"Attempting to load data from blocked url: {:s}".format(url)
|
||||
)
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
url,
|
||||
@ -248,7 +258,7 @@ def get_data(url, params=None):
|
||||
|
||||
|
||||
def get_image(url):
|
||||
""" wrapper for requesting an image """
|
||||
"""wrapper for requesting an image"""
|
||||
try:
|
||||
resp = requests.get(
|
||||
url,
|
||||
@ -266,7 +276,7 @@ def get_image(url):
|
||||
|
||||
@dataclass
|
||||
class SearchResult:
|
||||
""" standardized search result object """
|
||||
"""standardized search result object"""
|
||||
|
||||
title: str
|
||||
key: str
|
||||
@ -283,14 +293,14 @@ class SearchResult:
|
||||
)
|
||||
|
||||
def json(self):
|
||||
""" serialize a connector for json response """
|
||||
"""serialize a connector for json response"""
|
||||
serialized = asdict(self)
|
||||
del serialized["connector"]
|
||||
return serialized
|
||||
|
||||
|
||||
class Mapping:
|
||||
""" associate a local database field with a field in an external dataset """
|
||||
"""associate a local database field with a field in an external dataset"""
|
||||
|
||||
def __init__(self, local_field, remote_field=None, formatter=None):
|
||||
noop = lambda x: x
|
||||
@ -300,7 +310,7 @@ class Mapping:
|
||||
self.formatter = formatter or noop
|
||||
|
||||
def get_value(self, data):
|
||||
""" pull a field from incoming json and return the formatted version """
|
||||
"""pull a field from incoming json and return the formatted version"""
|
||||
value = data.get(self.remote_field)
|
||||
if not value:
|
||||
return None
|
||||
|
@ -4,7 +4,7 @@ from .abstract_connector import AbstractMinimalConnector, SearchResult
|
||||
|
||||
|
||||
class Connector(AbstractMinimalConnector):
|
||||
""" this is basically just for search """
|
||||
"""this is basically just for search"""
|
||||
|
||||
def get_or_create_book(self, remote_id):
|
||||
edition = activitypub.resolve_remote_id(remote_id, model=models.Edition)
|
||||
|
@ -1,5 +1,6 @@
|
||||
""" interface with whatever connectors the app has """
|
||||
import importlib
|
||||
import logging
|
||||
import re
|
||||
from urllib.parse import urlparse
|
||||
|
||||
@ -11,13 +12,15 @@ from requests import HTTPError
|
||||
from bookwyrm import models
|
||||
from bookwyrm.tasks import app
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ConnectorException(HTTPError):
|
||||
""" when the connector can't do what was asked """
|
||||
"""when the connector can't do what was asked"""
|
||||
|
||||
|
||||
def search(query, min_confidence=0.1):
|
||||
""" find books based on arbitary keywords """
|
||||
"""find books based on arbitary keywords"""
|
||||
if not query:
|
||||
return []
|
||||
results = []
|
||||
@ -37,14 +40,17 @@ def search(query, min_confidence=0.1):
|
||||
else:
|
||||
try:
|
||||
result_set = connector.isbn_search(isbn)
|
||||
except (HTTPError, ConnectorException):
|
||||
pass
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
logger.exception(e)
|
||||
continue
|
||||
|
||||
# if no isbn search or results, we fallback to generic search
|
||||
if result_set in (None, []):
|
||||
try:
|
||||
result_set = connector.search(query, min_confidence=min_confidence)
|
||||
except (HTTPError, ConnectorException):
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
# we don't want *any* error to crash the whole search page
|
||||
logger.exception(e)
|
||||
continue
|
||||
|
||||
# if the search results look the same, ignore them
|
||||
@ -61,20 +67,22 @@ def search(query, min_confidence=0.1):
|
||||
return results
|
||||
|
||||
|
||||
def local_search(query, min_confidence=0.1, raw=False):
|
||||
""" only look at local search results """
|
||||
def local_search(query, min_confidence=0.1, raw=False, filters=None):
|
||||
"""only look at local search results"""
|
||||
connector = load_connector(models.Connector.objects.get(local=True))
|
||||
return connector.search(query, min_confidence=min_confidence, raw=raw)
|
||||
return connector.search(
|
||||
query, min_confidence=min_confidence, raw=raw, filters=filters
|
||||
)
|
||||
|
||||
|
||||
def isbn_local_search(query, raw=False):
|
||||
""" only look at local search results """
|
||||
"""only look at local search results"""
|
||||
connector = load_connector(models.Connector.objects.get(local=True))
|
||||
return connector.isbn_search(query, raw=raw)
|
||||
|
||||
|
||||
def first_search_result(query, min_confidence=0.1):
|
||||
""" search until you find a result that fits """
|
||||
"""search until you find a result that fits"""
|
||||
for connector in get_connectors():
|
||||
result = connector.search(query, min_confidence=min_confidence)
|
||||
if result:
|
||||
@ -83,13 +91,13 @@ def first_search_result(query, min_confidence=0.1):
|
||||
|
||||
|
||||
def get_connectors():
|
||||
""" load all connectors """
|
||||
"""load all connectors"""
|
||||
for info in models.Connector.objects.order_by("priority").all():
|
||||
yield load_connector(info)
|
||||
|
||||
|
||||
def get_or_create_connector(remote_id):
|
||||
""" get the connector related to the object's server """
|
||||
"""get the connector related to the object's server"""
|
||||
url = urlparse(remote_id)
|
||||
identifier = url.netloc
|
||||
if not identifier:
|
||||
@ -113,7 +121,7 @@ def get_or_create_connector(remote_id):
|
||||
|
||||
@app.task
|
||||
def load_more_data(connector_id, book_id):
|
||||
""" background the work of getting all 10,000 editions of LoTR """
|
||||
"""background the work of getting all 10,000 editions of LoTR"""
|
||||
connector_info = models.Connector.objects.get(id=connector_id)
|
||||
connector = load_connector(connector_info)
|
||||
book = models.Book.objects.select_subclasses().get(id=book_id)
|
||||
@ -121,7 +129,7 @@ def load_more_data(connector_id, book_id):
|
||||
|
||||
|
||||
def load_connector(connector_info):
|
||||
""" instantiate the connector class """
|
||||
"""instantiate the connector class"""
|
||||
connector = importlib.import_module(
|
||||
"bookwyrm.connectors.%s" % connector_info.connector_file
|
||||
)
|
||||
@ -131,6 +139,6 @@ def load_connector(connector_info):
|
||||
@receiver(signals.post_save, sender="bookwyrm.FederatedServer")
|
||||
# pylint: disable=unused-argument
|
||||
def create_connector(sender, instance, created, *args, **kwargs):
|
||||
""" create a connector to an external bookwyrm server """
|
||||
"""create a connector to an external bookwyrm server"""
|
||||
if instance.application_type == "bookwyrm":
|
||||
get_or_create_connector("https://{:s}".format(instance.server_name))
|
||||
|
@ -9,7 +9,7 @@ from .openlibrary_languages import languages
|
||||
|
||||
|
||||
class Connector(AbstractConnector):
|
||||
""" instantiate a connector for OL """
|
||||
"""instantiate a connector for OL"""
|
||||
|
||||
def __init__(self, identifier):
|
||||
super().__init__(identifier)
|
||||
@ -59,7 +59,7 @@ class Connector(AbstractConnector):
|
||||
]
|
||||
|
||||
def get_remote_id_from_data(self, data):
|
||||
""" format a url from an openlibrary id field """
|
||||
"""format a url from an openlibrary id field"""
|
||||
try:
|
||||
key = data["key"]
|
||||
except KeyError:
|
||||
@ -87,16 +87,19 @@ class Connector(AbstractConnector):
|
||||
return get_data(url)
|
||||
|
||||
def get_authors_from_data(self, data):
|
||||
""" parse author json and load or create authors """
|
||||
"""parse author json and load or create authors"""
|
||||
for author_blob in data.get("authors", []):
|
||||
author_blob = author_blob.get("author", author_blob)
|
||||
# this id is "/authors/OL1234567A"
|
||||
author_id = author_blob["key"]
|
||||
url = "%s%s" % (self.base_url, author_id)
|
||||
yield self.get_or_create_author(url)
|
||||
author = self.get_or_create_author(url)
|
||||
if not author:
|
||||
continue
|
||||
yield author
|
||||
|
||||
def get_cover_url(self, cover_blob, size="L"):
|
||||
""" ask openlibrary for the cover """
|
||||
"""ask openlibrary for the cover"""
|
||||
if not cover_blob:
|
||||
return None
|
||||
cover_id = cover_blob[0]
|
||||
@ -138,7 +141,7 @@ class Connector(AbstractConnector):
|
||||
)
|
||||
|
||||
def load_edition_data(self, olkey):
|
||||
""" query openlibrary for editions of a work """
|
||||
"""query openlibrary for editions of a work"""
|
||||
url = "%s/works/%s/editions" % (self.books_url, olkey)
|
||||
return get_data(url)
|
||||
|
||||
@ -163,7 +166,7 @@ class Connector(AbstractConnector):
|
||||
|
||||
|
||||
def ignore_edition(edition_data):
|
||||
""" don't load a million editions that have no metadata """
|
||||
"""don't load a million editions that have no metadata"""
|
||||
# an isbn, we love to see it
|
||||
if edition_data.get("isbn_13") or edition_data.get("isbn_10"):
|
||||
return False
|
||||
@ -182,19 +185,19 @@ def ignore_edition(edition_data):
|
||||
|
||||
|
||||
def get_description(description_blob):
|
||||
""" descriptions can be a string or a dict """
|
||||
"""descriptions can be a string or a dict"""
|
||||
if isinstance(description_blob, dict):
|
||||
return description_blob.get("value")
|
||||
return description_blob
|
||||
|
||||
|
||||
def get_openlibrary_key(key):
|
||||
""" convert /books/OL27320736M into OL27320736M """
|
||||
"""convert /books/OL27320736M into OL27320736M"""
|
||||
return key.split("/")[-1]
|
||||
|
||||
|
||||
def get_languages(language_blob):
|
||||
""" /language/eng -> English """
|
||||
"""/language/eng -> English"""
|
||||
langs = []
|
||||
for lang in language_blob:
|
||||
langs.append(languages.get(lang.get("key", ""), None))
|
||||
@ -202,7 +205,7 @@ def get_languages(language_blob):
|
||||
|
||||
|
||||
def pick_default_edition(options):
|
||||
""" favor physical copies with covers in english """
|
||||
"""favor physical copies with covers in english"""
|
||||
if not options:
|
||||
return None
|
||||
if len(options) == 1:
|
||||
|
@ -10,18 +10,19 @@ from .abstract_connector import AbstractConnector, SearchResult
|
||||
|
||||
|
||||
class Connector(AbstractConnector):
|
||||
""" instantiate a connector """
|
||||
"""instantiate a connector"""
|
||||
|
||||
# pylint: disable=arguments-differ
|
||||
def search(self, query, min_confidence=0.1, raw=False):
|
||||
""" search your local database """
|
||||
def search(self, query, min_confidence=0.1, raw=False, filters=None):
|
||||
"""search your local database"""
|
||||
filters = filters or []
|
||||
if not query:
|
||||
return []
|
||||
# first, try searching unqiue identifiers
|
||||
results = search_identifiers(query)
|
||||
results = search_identifiers(query, *filters)
|
||||
if not results:
|
||||
# then try searching title/author
|
||||
results = search_title_author(query, min_confidence)
|
||||
results = search_title_author(query, min_confidence, *filters)
|
||||
search_results = []
|
||||
for result in results:
|
||||
if raw:
|
||||
@ -35,7 +36,7 @@ class Connector(AbstractConnector):
|
||||
return search_results
|
||||
|
||||
def isbn_search(self, query, raw=False):
|
||||
""" search your local database """
|
||||
"""search your local database"""
|
||||
if not query:
|
||||
return []
|
||||
|
||||
@ -87,26 +88,26 @@ class Connector(AbstractConnector):
|
||||
return None
|
||||
|
||||
def parse_isbn_search_data(self, data):
|
||||
""" it's already in the right format, don't even worry about it """
|
||||
"""it's already in the right format, don't even worry about it"""
|
||||
return data
|
||||
|
||||
def parse_search_data(self, data):
|
||||
""" it's already in the right format, don't even worry about it """
|
||||
"""it's already in the right format, don't even worry about it"""
|
||||
return data
|
||||
|
||||
def expand_book_data(self, book):
|
||||
pass
|
||||
|
||||
|
||||
def search_identifiers(query):
|
||||
""" tries remote_id, isbn; defined as dedupe fields on the model """
|
||||
filters = [
|
||||
def search_identifiers(query, *filters):
|
||||
"""tries remote_id, isbn; defined as dedupe fields on the model"""
|
||||
or_filters = [
|
||||
{f.name: query}
|
||||
for f in models.Edition._meta.get_fields()
|
||||
if hasattr(f, "deduplication_field") and f.deduplication_field
|
||||
]
|
||||
results = models.Edition.objects.filter(
|
||||
reduce(operator.or_, (Q(**f) for f in filters))
|
||||
*filters, reduce(operator.or_, (Q(**f) for f in or_filters))
|
||||
).distinct()
|
||||
|
||||
# when there are multiple editions of the same work, pick the default.
|
||||
@ -114,8 +115,8 @@ def search_identifiers(query):
|
||||
return results.filter(parent_work__default_edition__id=F("id")) or results
|
||||
|
||||
|
||||
def search_title_author(query, min_confidence):
|
||||
""" searches for title and author """
|
||||
def search_title_author(query, min_confidence, *filters):
|
||||
"""searches for title and author"""
|
||||
vector = (
|
||||
SearchVector("title", weight="A")
|
||||
+ SearchVector("subtitle", weight="B")
|
||||
@ -126,7 +127,7 @@ def search_title_author(query, min_confidence):
|
||||
results = (
|
||||
models.Edition.objects.annotate(search=vector)
|
||||
.annotate(rank=SearchRank(vector, query))
|
||||
.filter(rank__gt=min_confidence)
|
||||
.filter(*filters, rank__gt=min_confidence)
|
||||
.order_by("-rank")
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user