Merge branch 'main' into inventaire

2021-04-26 14:22:05 -07:00
parent 922428cab7 ce0e90d472
commit 64c2313a5d
280 changed files with 20693 additions and 9991 deletions
--- a/bookwyrm/connectors/abstract_connector.py
+++ b/bookwyrm/connectors/abstract_connector.py
@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)


 class AbstractMinimalConnector(ABC):
-    """ just the bare bones, for other bookwyrm instances """
+    """just the bare bones, for other bookwyrm instances"""

    def __init__(self, identifier):
        # load connector settings
@ -39,7 +39,7 @@ class AbstractMinimalConnector(ABC):
            setattr(self, field, getattr(info, field))

    def search(self, query, min_confidence=None):
-        """ free text search """
+        """free text search"""
        params = {}
        if min_confidence:
            params["min_confidence"] = min_confidence
@ -55,7 +55,7 @@ class AbstractMinimalConnector(ABC):
        return results

    def isbn_search(self, query):
-        """ isbn search """
+        """isbn search"""
        params = {}
        data = self.get_search_data(
            "%s%s" % (self.isbn_search_url, query),
@ -74,27 +74,27 @@ class AbstractMinimalConnector(ABC):

    @abstractmethod
    def get_or_create_book(self, remote_id):
-        """ pull up a book record by whatever means possible """
+        """pull up a book record by whatever means possible"""

    @abstractmethod
    def parse_search_data(self, data):
-        """ turn the result json from a search into a list """
+        """turn the result json from a search into a list"""

    @abstractmethod
    def format_search_result(self, search_result):
-        """ create a SearchResult obj from json """
+        """create a SearchResult obj from json"""

    @abstractmethod
    def parse_isbn_search_data(self, data):
-        """ turn the result json from a search into a list """
+        """turn the result json from a search into a list"""

    @abstractmethod
    def format_isbn_search_result(self, search_result):
-        """ create a SearchResult obj from json """
+        """create a SearchResult obj from json"""


 class AbstractConnector(AbstractMinimalConnector):
-    """ generic book data connector """
+    """generic book data connector"""

    def __init__(self, identifier):
        super().__init__(identifier)
@ -103,14 +103,14 @@ class AbstractConnector(AbstractMinimalConnector):
        self.book_mappings = []

    def is_available(self):
-        """ check if you're allowed to use this connector """
+        """check if you're allowed to use this connector"""
        if self.max_query_count is not None:
            if self.connector.query_count >= self.max_query_count:
                return False
        return True

    def get_or_create_book(self, remote_id):
-        """ translate arbitrary json into an Activitypub dataclass """
+        """translate arbitrary json into an Activitypub dataclass"""
        # first, check if we have the origin_id saved
        existing = models.Edition.find_existing_by_remote_id(
            remote_id
@ -159,7 +159,7 @@ class AbstractConnector(AbstractMinimalConnector):
        return get_data(remote_id)

    def create_edition_from_data(self, work, edition_data):
-        """ if we already have the work, we're ready """
+        """if we already have the work, we're ready"""
        mapped_data = dict_from_mappings(edition_data, self.book_mappings)
        mapped_data["work"] = work.remote_id
        edition_activity = activitypub.Edition(**mapped_data)
@ -179,7 +179,7 @@ class AbstractConnector(AbstractMinimalConnector):
        return edition

    def get_or_create_author(self, remote_id):
-        """ load that author """
+        """load that author"""
        existing = models.Author.find_existing_by_remote_id(remote_id)
        if existing:
            return existing
@ -187,29 +187,33 @@ class AbstractConnector(AbstractMinimalConnector):
        data = self.get_book_data(remote_id)

        mapped_data = dict_from_mappings(data, self.author_mappings)
-        activity = activitypub.Author(**mapped_data)
+        try:
+            activity = activitypub.Author(**mapped_data)
+        except activitypub.ActivitySerializerError:
+            return None
+
        # this will dedupe
        return activity.to_model(model=models.Author)

    @abstractmethod
    def is_work_data(self, data):
-        """ differentiate works and editions """
+        """differentiate works and editions"""

    @abstractmethod
    def get_edition_from_work_data(self, data):
-        """ every work needs at least one edition """
+        """every work needs at least one edition"""

    @abstractmethod
    def get_work_from_edition_data(self, data):
-        """ every edition needs a work """
+        """every edition needs a work"""

    @abstractmethod
    def get_authors_from_data(self, data):
-        """ load author data """
+        """load author data"""

    @abstractmethod
    def expand_book_data(self, book):
-        """ get more info on a book """
+        """get more info on a book"""


 def dict_from_mappings(data, mappings):
@ -222,7 +226,13 @@ def dict_from_mappings(data, mappings):


 def get_data(url, params=None):
-    """ wrapper for request.get """
+    """wrapper for request.get"""
+    # check if the url is blocked
+    if models.FederatedServer.is_blocked(url):
+        raise ConnectorException(
+            "Attempting to load data from blocked url: {:s}".format(url)
+        )
+
    try:
        resp = requests.get(
            url,
@ -248,7 +258,7 @@ def get_data(url, params=None):


 def get_image(url):
-    """ wrapper for requesting an image """
+    """wrapper for requesting an image"""
    try:
        resp = requests.get(
            url,
@ -266,7 +276,7 @@ def get_image(url):

@dataclass
 class SearchResult:
-    """ standardized search result object """
+    """standardized search result object"""

    title: str
    key: str
@ -283,14 +293,14 @@ class SearchResult:
        )

    def json(self):
-        """ serialize a connector for json response """
+        """serialize a connector for json response"""
        serialized = asdict(self)
        del serialized["connector"]
        return serialized


 class Mapping:
-    """ associate a local database field with a field in an external dataset """
+    """associate a local database field with a field in an external dataset"""

    def __init__(self, local_field, remote_field=None, formatter=None):
        noop = lambda x: x
@ -300,7 +310,7 @@ class Mapping:
        self.formatter = formatter or noop

    def get_value(self, data):
-        """ pull a field from incoming json and return the formatted version """
+        """pull a field from incoming json and return the formatted version"""
        value = data.get(self.remote_field)
        if not value:
            return None
--- a/bookwyrm/connectors/bookwyrm_connector.py
+++ b/bookwyrm/connectors/bookwyrm_connector.py
@ -4,7 +4,7 @@ from .abstract_connector import AbstractMinimalConnector, SearchResult


 class Connector(AbstractMinimalConnector):
-    """ this is basically just for search """
+    """this is basically just for search"""

    def get_or_create_book(self, remote_id):
        edition = activitypub.resolve_remote_id(remote_id, model=models.Edition)
--- a/bookwyrm/connectors/connector_manager.py
+++ b/bookwyrm/connectors/connector_manager.py
@ -1,5 +1,6 @@
 """ interface with whatever connectors the app has """
 import importlib
+import logging
 import re
 from urllib.parse import urlparse

@ -11,13 +12,15 @@ from requests import HTTPError
 from bookwyrm import models
 from bookwyrm.tasks import app

+logger = logging.getLogger(__name__)
+

 class ConnectorException(HTTPError):
-    """ when the connector can't do what was asked """
+    """when the connector can't do what was asked"""


 def search(query, min_confidence=0.1):
-    """ find books based on arbitary keywords """
+    """find books based on arbitary keywords"""
    if not query:
        return []
    results = []
@ -37,14 +40,17 @@ def search(query, min_confidence=0.1):
            else:
                try:
                    result_set = connector.isbn_search(isbn)
-                except (HTTPError, ConnectorException):
-                    pass
+                except Exception as e:  # pylint: disable=broad-except
+                    logger.exception(e)
+                    continue

        # if no isbn search or results, we fallback to generic search
        if result_set in (None, []):
            try:
                result_set = connector.search(query, min_confidence=min_confidence)
-            except (HTTPError, ConnectorException):
+            except Exception as e:  # pylint: disable=broad-except
+                # we don't want *any* error to crash the whole search page
+                logger.exception(e)
                continue

        # if the search results look the same, ignore them
@ -61,20 +67,22 @@ def search(query, min_confidence=0.1):
    return results


-def local_search(query, min_confidence=0.1, raw=False):
-    """ only look at local search results """
+def local_search(query, min_confidence=0.1, raw=False, filters=None):
+    """only look at local search results"""
    connector = load_connector(models.Connector.objects.get(local=True))
-    return connector.search(query, min_confidence=min_confidence, raw=raw)
+    return connector.search(
+        query, min_confidence=min_confidence, raw=raw, filters=filters
+    )


 def isbn_local_search(query, raw=False):
-    """ only look at local search results """
+    """only look at local search results"""
    connector = load_connector(models.Connector.objects.get(local=True))
    return connector.isbn_search(query, raw=raw)


 def first_search_result(query, min_confidence=0.1):
-    """ search until you find a result that fits """
+    """search until you find a result that fits"""
    for connector in get_connectors():
        result = connector.search(query, min_confidence=min_confidence)
        if result:
@ -83,13 +91,13 @@ def first_search_result(query, min_confidence=0.1):


 def get_connectors():
-    """ load all connectors """
+    """load all connectors"""
    for info in models.Connector.objects.order_by("priority").all():
        yield load_connector(info)


 def get_or_create_connector(remote_id):
-    """ get the connector related to the object's server """
+    """get the connector related to the object's server"""
    url = urlparse(remote_id)
    identifier = url.netloc
    if not identifier:
@ -113,7 +121,7 @@ def get_or_create_connector(remote_id):

@app.task
 def load_more_data(connector_id, book_id):
-    """ background the work of getting all 10,000 editions of LoTR """
+    """background the work of getting all 10,000 editions of LoTR"""
    connector_info = models.Connector.objects.get(id=connector_id)
    connector = load_connector(connector_info)
    book = models.Book.objects.select_subclasses().get(id=book_id)
@ -121,7 +129,7 @@ def load_more_data(connector_id, book_id):


 def load_connector(connector_info):
-    """ instantiate the connector class """
+    """instantiate the connector class"""
    connector = importlib.import_module(
        "bookwyrm.connectors.%s" % connector_info.connector_file
    )
@ -131,6 +139,6 @@ def load_connector(connector_info):
@receiver(signals.post_save, sender="bookwyrm.FederatedServer")
 # pylint: disable=unused-argument
 def create_connector(sender, instance, created, *args, **kwargs):
-    """ create a connector to an external bookwyrm server """
+    """create a connector to an external bookwyrm server"""
    if instance.application_type == "bookwyrm":
        get_or_create_connector("https://{:s}".format(instance.server_name))
--- a/bookwyrm/connectors/openlibrary.py
+++ b/bookwyrm/connectors/openlibrary.py
@ -9,7 +9,7 @@ from .openlibrary_languages import languages


 class Connector(AbstractConnector):
-    """ instantiate a connector for OL """
+    """instantiate a connector for OL"""

    def __init__(self, identifier):
        super().__init__(identifier)
@ -59,7 +59,7 @@ class Connector(AbstractConnector):
        ]

    def get_remote_id_from_data(self, data):
-        """ format a url from an openlibrary id field """
+        """format a url from an openlibrary id field"""
        try:
            key = data["key"]
        except KeyError:
@ -87,16 +87,19 @@ class Connector(AbstractConnector):
        return get_data(url)

    def get_authors_from_data(self, data):
-        """ parse author json and load or create authors """
+        """parse author json and load or create authors"""
        for author_blob in data.get("authors", []):
            author_blob = author_blob.get("author", author_blob)
            # this id is "/authors/OL1234567A"
            author_id = author_blob["key"]
            url = "%s%s" % (self.base_url, author_id)
-            yield self.get_or_create_author(url)
+            author = self.get_or_create_author(url)
+            if not author:
+                continue
+            yield author

    def get_cover_url(self, cover_blob, size="L"):
-        """ ask openlibrary for the cover """
+        """ask openlibrary for the cover"""
        if not cover_blob:
            return None
        cover_id = cover_blob[0]
@ -138,7 +141,7 @@ class Connector(AbstractConnector):
        )

    def load_edition_data(self, olkey):
-        """ query openlibrary for editions of a work """
+        """query openlibrary for editions of a work"""
        url = "%s/works/%s/editions" % (self.books_url, olkey)
        return get_data(url)

@ -163,7 +166,7 @@ class Connector(AbstractConnector):


 def ignore_edition(edition_data):
-    """ don't load a million editions that have no metadata """
+    """don't load a million editions that have no metadata"""
    # an isbn, we love to see it
    if edition_data.get("isbn_13") or edition_data.get("isbn_10"):
        return False
@ -182,19 +185,19 @@ def ignore_edition(edition_data):


 def get_description(description_blob):
-    """ descriptions can be a string or a dict """
+    """descriptions can be a string or a dict"""
    if isinstance(description_blob, dict):
        return description_blob.get("value")
    return description_blob


 def get_openlibrary_key(key):
-    """ convert /books/OL27320736M into OL27320736M """
+    """convert /books/OL27320736M into OL27320736M"""
    return key.split("/")[-1]


 def get_languages(language_blob):
-    """ /language/eng -> English """
+    """/language/eng -> English"""
    langs = []
    for lang in language_blob:
        langs.append(languages.get(lang.get("key", ""), None))
@ -202,7 +205,7 @@ def get_languages(language_blob):


 def pick_default_edition(options):
-    """ favor physical copies with covers in english """
+    """favor physical copies with covers in english"""
    if not options:
        return None
    if len(options) == 1:
--- a/bookwyrm/connectors/self_connector.py
+++ b/bookwyrm/connectors/self_connector.py
@ -10,18 +10,19 @@ from .abstract_connector import AbstractConnector, SearchResult


 class Connector(AbstractConnector):
-    """ instantiate a connector  """
+    """instantiate a connector"""

    # pylint: disable=arguments-differ
-    def search(self, query, min_confidence=0.1, raw=False):
-        """ search your local database """
+    def search(self, query, min_confidence=0.1, raw=False, filters=None):
+        """search your local database"""
+        filters = filters or []
        if not query:
            return []
        # first, try searching unqiue identifiers
-        results = search_identifiers(query)
+        results = search_identifiers(query, *filters)
        if not results:
            # then try searching title/author
-            results = search_title_author(query, min_confidence)
+            results = search_title_author(query, min_confidence, *filters)
        search_results = []
        for result in results:
            if raw:
@ -35,7 +36,7 @@ class Connector(AbstractConnector):
        return search_results

    def isbn_search(self, query, raw=False):
-        """ search your local database """
+        """search your local database"""
        if not query:
            return []

@ -87,26 +88,26 @@ class Connector(AbstractConnector):
        return None

    def parse_isbn_search_data(self, data):
-        """ it's already in the right format, don't even worry about it """
+        """it's already in the right format, don't even worry about it"""
        return data

    def parse_search_data(self, data):
-        """ it's already in the right format, don't even worry about it """
+        """it's already in the right format, don't even worry about it"""
        return data

    def expand_book_data(self, book):
        pass


-def search_identifiers(query):
-    """ tries remote_id, isbn; defined as dedupe fields on the model """
-    filters = [
+def search_identifiers(query, *filters):
+    """tries remote_id, isbn; defined as dedupe fields on the model"""
+    or_filters = [
        {f.name: query}
        for f in models.Edition._meta.get_fields()
        if hasattr(f, "deduplication_field") and f.deduplication_field
    ]
    results = models.Edition.objects.filter(
-        reduce(operator.or_, (Q(**f) for f in filters))
+        *filters, reduce(operator.or_, (Q(**f) for f in or_filters))
    ).distinct()

    # when there are multiple editions of the same work, pick the default.
@ -114,8 +115,8 @@ def search_identifiers(query):
    return results.filter(parent_work__default_edition__id=F("id")) or results


-def search_title_author(query, min_confidence):
-    """ searches for title and author """
+def search_title_author(query, min_confidence, *filters):
+    """searches for title and author"""
    vector = (
        SearchVector("title", weight="A")
        + SearchVector("subtitle", weight="B")
@ -126,7 +127,7 @@ def search_title_author(query, min_confidence):
    results = (
        models.Edition.objects.annotate(search=vector)
        .annotate(rank=SearchRank(vector, query))
-        .filter(rank__gt=min_confidence)
+        .filter(*filters, rank__gt=min_confidence)
        .order_by("-rank")
    )