Fixes loading more data
This commit is contained in:
		| @@ -50,7 +50,7 @@ class Work(Book): | ||||
|     ''' work instance of a book object ''' | ||||
|     lccn: str = '' | ||||
|     defaultEdition: str = '' | ||||
|     editions: List[str] | ||||
|     editions: List[str] = field(default_factory=lambda: []) | ||||
|     type: str = 'Work' | ||||
|  | ||||
|  | ||||
| @@ -58,9 +58,9 @@ class Work(Book): | ||||
| class Author(ActivityObject): | ||||
|     ''' author of a book ''' | ||||
|     name: str | ||||
|     born: str = '' | ||||
|     died: str = '' | ||||
|     aliases: str = '' | ||||
|     born: str = None | ||||
|     died: str = None | ||||
|     aliases: List[str] = field(default_factory=lambda: []) | ||||
|     bio: str = '' | ||||
|     openlibraryKey: str = '' | ||||
|     wikipediaLink: str = '' | ||||
|   | ||||
| @@ -1,11 +1,9 @@ | ||||
| ''' functionality outline for a book data connector ''' | ||||
| from abc import ABC, abstractmethod | ||||
| from dataclasses import dataclass | ||||
| import pytz | ||||
| from urllib3.exceptions import RequestError | ||||
|  | ||||
| from django.db import transaction | ||||
| from dateutil import parser | ||||
| import requests | ||||
| from requests import HTTPError | ||||
| from requests.exceptions import SSLError | ||||
| @@ -102,12 +100,10 @@ class AbstractConnector(AbstractMinimalConnector): | ||||
|         if self.is_work_data(data): | ||||
|             try: | ||||
|                 edition_data = self.get_edition_from_work_data(data) | ||||
|                 edition_data = dict_from_mappings(\ | ||||
|                         edition_data, self.book_mappings) | ||||
|             except KeyError: | ||||
|                 # hack: re-use the work data as the edition data | ||||
|                 # this is why remote ids aren't necessarily unique | ||||
|                 edition_data = mapped_data | ||||
|                 edition_data = data | ||||
|             work_data = mapped_data | ||||
|         else: | ||||
|             try: | ||||
| @@ -115,75 +111,76 @@ class AbstractConnector(AbstractMinimalConnector): | ||||
|                 work_data = dict_from_mappings(work_data, self.book_mappings) | ||||
|             except KeyError: | ||||
|                 work_data = mapped_data | ||||
|             edition_data = mapped_data | ||||
|             edition_data = data | ||||
|  | ||||
|         if not work_data or not edition_data: | ||||
|             raise ConnectorException('Unable to load book data: %s' % remote_id) | ||||
|  | ||||
|         # create activitypub object | ||||
|         work_activity = activitypub.Work(**work_data) | ||||
|         edition_activity = activitypub.Edition(**edition_data) | ||||
|  | ||||
|         # this will dedupe automatically | ||||
|         work = work_activity.to_model(models.Work, save=False) | ||||
|         edition = edition_activity.to_model(models.Edition, save=False) | ||||
|  | ||||
|         edition.parent_work = work | ||||
|         work.default_edition = edition | ||||
|  | ||||
|         work.save() | ||||
|         edition.save() | ||||
|  | ||||
|         work = work_activity.to_model(models.Work) | ||||
|         for author in self.get_authors_from_data(data): | ||||
|             work.authors.add(author) | ||||
|         return self.create_edition_from_data(work, edition_data) | ||||
|  | ||||
|  | ||||
|     def create_edition_from_data(self, work, edition_data): | ||||
|         ''' if we already have the work, we're ready ''' | ||||
|         mapped_data = dict_from_mappings(edition_data, self.book_mappings) | ||||
|         mapped_data['work'] = work.remote_id | ||||
|         edition_activity = activitypub.Edition(**mapped_data) | ||||
|         edition = edition_activity.to_model(models.Edition) | ||||
|         edition.connector = self.connector | ||||
|         edition.save() | ||||
|  | ||||
|         work.default_edition = edition | ||||
|         work.save() | ||||
|  | ||||
|         for author in self.get_authors_from_data(edition_data): | ||||
|             edition.authors.add(author) | ||||
|         if not edition.authors.exists() and work.authors.exists(): | ||||
|             edition.authors.add(work.authors.all()) | ||||
|  | ||||
|         return edition | ||||
|  | ||||
|  | ||||
|     def get_or_create_author(self, remote_id): | ||||
|         ''' load that author ''' | ||||
|         existing = models.Author.find_exising_by_remote_id(remote_id) | ||||
|         existing = models.Author.find_existing_by_remote_id(remote_id) | ||||
|         if existing: | ||||
|             return existing | ||||
|  | ||||
|         data = get_data(remote_id) | ||||
|  | ||||
|         author_activity = dict_from_mappings(data, self.author_mappings) | ||||
|         mapped_data = dict_from_mappings(data, self.author_mappings) | ||||
|         activity = activitypub.Author(**mapped_data) | ||||
|         # this will dedupe | ||||
|         return activitypub.Author(**author_activity).to_model() | ||||
|         return activity.to_model(models.Author) | ||||
|  | ||||
|  | ||||
|     @abstractmethod | ||||
|     def is_work_data(self, data): | ||||
|         ''' differentiate works and editions ''' | ||||
|  | ||||
|  | ||||
|     @abstractmethod | ||||
|     def get_edition_from_work_data(self, data): | ||||
|         ''' every work needs at least one edition ''' | ||||
|  | ||||
|  | ||||
|     @abstractmethod | ||||
|     def get_work_from_edition_date(self, data): | ||||
|         ''' every edition needs a work ''' | ||||
|  | ||||
|  | ||||
|     @abstractmethod | ||||
|     def get_authors_from_data(self, data): | ||||
|         ''' load author data ''' | ||||
|  | ||||
|  | ||||
|     @abstractmethod | ||||
|     def get_cover_from_data(self, data): | ||||
|         ''' load cover ''' | ||||
|  | ||||
|     @abstractmethod | ||||
|     def expand_book_data(self, book): | ||||
|         ''' get more info on a book ''' | ||||
|  | ||||
|  | ||||
| def dict_from_mappings(self, data, mappings): | ||||
| def dict_from_mappings(data, mappings): | ||||
|     ''' create a dict in Activitypub format, using mappings supplies by | ||||
|     the subclass ''' | ||||
|     result = {} | ||||
| @@ -250,4 +247,9 @@ class Mapping: | ||||
|     def get_value(self, data): | ||||
|         ''' pull a field from incoming json and return the formatted version ''' | ||||
|         value = data.get(self.remote_field) | ||||
|         if not value: | ||||
|             return None | ||||
|         try: | ||||
|             return self.formatter(value) | ||||
|         except:# pylint: disable=bare-except | ||||
|             return None | ||||
|   | ||||
| @@ -1,13 +1,9 @@ | ||||
| ''' openlibrary data connector ''' | ||||
| import re | ||||
| import requests | ||||
|  | ||||
| from django.core.files.base import ContentFile | ||||
|  | ||||
| from bookwyrm import models | ||||
| from .abstract_connector import AbstractConnector, SearchResult, Mapping | ||||
| from .abstract_connector import ConnectorException, dict_from_mappings | ||||
| from .abstract_connector import get_data, update_from_mappings | ||||
| from .abstract_connector import ConnectorException, get_data | ||||
| from .openlibrary_languages import languages | ||||
|  | ||||
|  | ||||
| @@ -17,8 +13,12 @@ class Connector(AbstractConnector): | ||||
|         super().__init__(identifier) | ||||
|  | ||||
|         get_first = lambda a: a[0] | ||||
|         get_remote_id = lambda a: self.base_url + a | ||||
|         self.book_mappings = [ | ||||
|             Mapping('title'), | ||||
|             Mapping('id', remote_field='key', formatter=get_remote_id), | ||||
|             Mapping( | ||||
|                 'cover', remote_field='covers', formatter=self.get_cover_url), | ||||
|             Mapping('sortTitle', remote_field='sort_title'), | ||||
|             Mapping('subtitle'), | ||||
|             Mapping('description', formatter=get_description), | ||||
| @@ -50,7 +50,12 @@ class Connector(AbstractConnector): | ||||
|         ] | ||||
|  | ||||
|         self.author_mappings = [ | ||||
|             Mapping('id', remote_field='key', formatter=get_remote_id), | ||||
|             Mapping('name'), | ||||
|             Mapping( | ||||
|                 'openlibraryKey', remote_field='key', | ||||
|                 formatter=get_openlibrary_key | ||||
|             ), | ||||
|             Mapping('born', remote_field='birth_date'), | ||||
|             Mapping('died', remote_field='death_date'), | ||||
|             Mapping('bio', formatter=get_description), | ||||
| @@ -58,6 +63,7 @@ class Connector(AbstractConnector): | ||||
|  | ||||
|  | ||||
|     def get_remote_id_from_data(self, data): | ||||
|         ''' format a url from an openlibrary id field ''' | ||||
|         try: | ||||
|             key = data['key'] | ||||
|         except KeyError: | ||||
| @@ -93,24 +99,16 @@ class Connector(AbstractConnector): | ||||
|         for author_blob in data.get('authors', []): | ||||
|             author_blob = author_blob.get('author', author_blob) | ||||
|             # this id is "/authors/OL1234567A" | ||||
|             author_id = author_blob['key'].split('/')[-1] | ||||
|             author_id = author_blob['key'] | ||||
|             url = '%s/%s.json' % (self.base_url, author_id) | ||||
|             yield self.get_or_create_author(url) | ||||
|  | ||||
|  | ||||
|     def get_cover_from_data(self, data): | ||||
|     def get_cover_url(self, cover_blob): | ||||
|         ''' ask openlibrary for the cover ''' | ||||
|         if not data.get('covers'): | ||||
|             return None | ||||
|  | ||||
|         cover_id = data.get('covers')[0] | ||||
|         cover_id = cover_blob[0] | ||||
|         image_name = '%s-M.jpg' % cover_id | ||||
|         url = '%s/b/id/%s' % (self.covers_url, image_name) | ||||
|         response = requests.get(url) | ||||
|         if not response.ok: | ||||
|             response.raise_for_status() | ||||
|         image_content = ContentFile(response.content) | ||||
|         return [image_name, image_content] | ||||
|         return '%s/b/id/%s' % (self.covers_url, image_name) | ||||
|  | ||||
|  | ||||
|     def parse_search_data(self, data): | ||||
| @@ -144,19 +142,7 @@ class Connector(AbstractConnector): | ||||
|         # we can mass download edition data from OL to avoid repeatedly querying | ||||
|         edition_options = self.load_edition_data(work.openlibrary_key) | ||||
|         for edition_data in edition_options.get('entries'): | ||||
|             olkey = edition_data.get('key').split('/')[-1] | ||||
|             # make sure the edition isn't already in the database | ||||
|             if models.Edition.objects.filter(openlibrary_key=olkey).count(): | ||||
|                 continue | ||||
|  | ||||
|             # creates and populates the book from the data | ||||
|             edition = self.create_book(olkey, edition_data, models.Edition) | ||||
|             # ensures that the edition is associated with the work | ||||
|             edition.parent_work = work | ||||
|             edition.save() | ||||
|             # get author data from the work if it's missing from the edition | ||||
|             if not edition.authors and work.authors: | ||||
|                 edition.authors.set(work.authors.all()) | ||||
|             self.create_edition_from_data(work, edition_data) | ||||
|  | ||||
|  | ||||
| def get_description(description_blob): | ||||
|   | ||||
| @@ -223,6 +223,8 @@ def resolve_book(request): | ||||
|     remote_id = request.POST.get('remote_id') | ||||
|     connector = books_manager.get_or_create_connector(remote_id) | ||||
|     book = connector.get_or_create_book(remote_id) | ||||
|     if book.connector: | ||||
|         books_manager.load_more_data.delay(book.id) | ||||
|  | ||||
|     return redirect('/book/%d' % book.id) | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user