diff --git a/bookwyrm/books_manager.py b/bookwyrm/books_manager.py index bfc543de..37a31766 100644 --- a/bookwyrm/books_manager.py +++ b/bookwyrm/books_manager.py @@ -64,14 +64,14 @@ def load_more_data(book_id): connector.expand_book_data(book) -def search(query): +def search(query, min_confidence=0.1): ''' find books based on arbitary keywords ''' results = [] dedup_slug = lambda r: '%s/%s/%s' % (r.title, r.author, r.year) result_index = set() for connector in get_connectors(): try: - result_set = connector.search(query) + result_set = connector.search(query, min_confidence=min_confidence) except HTTPError: continue @@ -87,16 +87,16 @@ def search(query): return results -def local_search(query): +def local_search(query, min_confidence=0.1): ''' only look at local search results ''' connector = load_connector(models.Connector.objects.get(local=True)) - return connector.search(query) + return connector.search(query, min_confidence=min_confidence) -def first_search_result(query): +def first_search_result(query, min_confidence=0.1): ''' search until you find a result that fits ''' for connector in get_connectors(): - result = connector.search(query) + result = connector.search(query, min_confidence=min_confidence) if result: return result[0] return None diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 25db648c..a34eb301 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -1,15 +1,17 @@ ''' functionality outline for a book data connector ''' from abc import ABC, abstractmethod +from dataclasses import dataclass from dateutil import parser import pytz import requests +from requests import HTTPError from django.db import transaction from bookwyrm import models -class ConnectorException(Exception): +class ConnectorException(HTTPError): ''' when the connector can't do what was asked ''' @@ -50,7 +52,7 @@ class AbstractConnector(ABC): return True - def search(self, query): + def search(self, query, min_confidence=None): ''' free text search ''' resp = requests.get( '%s%s' % (self.search_url, query), @@ -155,9 +157,11 @@ class AbstractConnector(ABC): ''' for creating a new book or syncing with data ''' book = update_from_mappings(book, data, self.book_mappings) + author_text = [] for author in self.get_authors_from_data(data): book.authors.add(author) - book.author_text = ', '.join(a.display_name for a in book.authors.all()) + author_text.append(author.display_name) + book.author_text = ', '.join(author_text) book.save() if not update_cover: @@ -287,25 +291,29 @@ def get_date(date_string): def get_data(url): ''' wrapper for request.get ''' - resp = requests.get( - url, - headers={ - 'Accept': 'application/json; charset=utf-8', - }, - ) + try: + resp = requests.get( + url, + headers={ + 'Accept': 'application/json; charset=utf-8', + }, + ) + except ConnectionError: + raise ConnectorException() if not resp.ok: resp.raise_for_status() data = resp.json() return data +@dataclass class SearchResult: ''' standardized search result object ''' - def __init__(self, title, key, author, year): - self.title = title - self.key = key - self.author = author - self.year = year + title: str + key: str + author: str + year: str + confidence: int = 1 def __repr__(self): return "".format( diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index d70ab3e2..0ae3ce35 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -129,10 +129,10 @@ class Connector(AbstractConnector): key = self.books_url + search_result['key'] author = search_result.get('author_name') or ['Unknown'] return SearchResult( - search_result.get('title'), - key, - ', '.join(author), - search_result.get('first_publish_year'), + title=search_result.get('title'), + key=key, + author=', '.join(author), + year=search_result.get('first_publish_year'), ) diff --git a/bookwyrm/connectors/self_connector.py b/bookwyrm/connectors/self_connector.py index 2711bb1a..0e77ecf6 100644 --- a/bookwyrm/connectors/self_connector.py +++ b/bookwyrm/connectors/self_connector.py @@ -7,7 +7,7 @@ from .abstract_connector import AbstractConnector, SearchResult class Connector(AbstractConnector): ''' instantiate a connector ''' - def search(self, query): + def search(self, query, min_confidence=0.1): ''' right now you can't search bookwyrm sorry, but when that gets implemented it will totally rule ''' vector = SearchVector('title', weight='A') +\ @@ -28,7 +28,7 @@ class Connector(AbstractConnector): ).annotate( rank=SearchRank(vector, query) ).filter( - rank__gt=0 + rank__gt=min_confidence ).order_by('-rank') results = results.filter(default=True) or results @@ -42,11 +42,12 @@ class Connector(AbstractConnector): def format_search_result(self, search_result): return SearchResult( - search_result.title, - search_result.local_id, - search_result.author_text, - search_result.published_date.year if \ + title=search_result.title, + key=search_result.local_id, + author=search_result.author_text, + year=search_result.published_date.year if \ search_result.published_date else None, + confidence=search_result.rank, ) diff --git a/bookwyrm/goodreads_import.py b/bookwyrm/goodreads_import.py index 7b64baa3..73057e43 100644 --- a/bookwyrm/goodreads_import.py +++ b/bookwyrm/goodreads_import.py @@ -42,13 +42,10 @@ def import_data(job_id): if item.book: item.save() results.append(item) + # shelves book and handles reviews + outgoing.handle_imported_book(job.user, item) else: - item.fail_reason = "Could not match book on OpenLibrary" + item.fail_reason = "Could not find a match for book" item.save() - - status = outgoing.handle_import_books(job.user, results) - if status: - job.import_status = status - job.save() finally: create_notification(job.user, 'IMPORT', related_import=job) diff --git a/bookwyrm/migrations/0058_remove_importjob_import_status.py b/bookwyrm/migrations/0058_remove_importjob_import_status.py new file mode 100644 index 00000000..61f41546 --- /dev/null +++ b/bookwyrm/migrations/0058_remove_importjob_import_status.py @@ -0,0 +1,17 @@ +# Generated by Django 3.0.7 on 2020-10-29 23:48 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('bookwyrm', '0057_auto_20201026_2131'), + ] + + operations = [ + migrations.RemoveField( + model_name='importjob', + name='import_status', + ), + ] diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index 085cfea6..240e0694 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -40,8 +40,7 @@ class ImportJob(models.Model): user = models.ForeignKey(User, on_delete=models.CASCADE) created_date = models.DateTimeField(default=timezone.now) task_id = models.CharField(max_length=100, null=True) - import_status = models.ForeignKey( - 'Status', null=True, on_delete=models.PROTECT) + class ImportItem(models.Model): ''' a single line of a csv being imported ''' @@ -64,13 +63,17 @@ class ImportItem(models.Model): def get_book_from_isbn(self): ''' search by isbn ''' - search_result = books_manager.first_search_result(self.isbn) + search_result = books_manager.first_search_result( + self.isbn, min_confidence=0.5 + ) if search_result: try: # don't crash the import when the connector fails return books_manager.get_or_create_book(search_result.key) except ConnectorException: pass + return None + def get_book_from_title_author(self): ''' search by title and author ''' @@ -78,12 +81,16 @@ class ImportItem(models.Model): self.data['Title'], self.data['Author'] ) - search_result = books_manager.first_search_result(search_term) + search_result = books_manager.first_search_result( + search_term, min_confidence=0.5 + ) if search_result: try: return books_manager.get_or_create_book(search_result.key) except ConnectorException: pass + return None + @property def isbn(self): @@ -95,6 +102,7 @@ class ImportItem(models.Model): ''' the goodreads shelf field ''' if self.data['Exclusive Shelf']: return GOODREADS_SHELVES.get(self.data['Exclusive Shelf']) + return None @property def review(self): @@ -111,12 +119,14 @@ class ImportItem(models.Model): ''' when the book was added to this dataset ''' if self.data['Date Added']: return dateutil.parser.parse(self.data['Date Added']) + return None @property def date_read(self): ''' the date a book was completed ''' if self.data['Date Read']: return dateutil.parser.parse(self.data['Date Read']) + return None @property def reads(self): @@ -126,6 +136,7 @@ class ImportItem(models.Model): return [ReadThrough(start_date=self.date_added)] if self.date_read: return [ReadThrough( + start_date=self.date_added, finish_date=self.date_read, )] return [] diff --git a/bookwyrm/outgoing.py b/bookwyrm/outgoing.py index 4681a670..eca3a2c8 100644 --- a/bookwyrm/outgoing.py +++ b/bookwyrm/outgoing.py @@ -155,51 +155,49 @@ def handle_unshelve(user, book, shelf): broadcast(user, activity) -def handle_import_books(user, items): +def handle_imported_book(user, item): ''' process a goodreads csv and then post about it ''' - new_books = [] - for item in items: - if item.shelf: - desired_shelf = models.Shelf.objects.get( - identifier=item.shelf, - user=user - ) - if isinstance(item.book, models.Work): - item.book = item.book.default_edition - if not item.book: - continue - shelf_book, created = models.ShelfBook.objects.get_or_create( - book=item.book, shelf=desired_shelf, added_by=user) - if created: - new_books.append(item.book) - activity = shelf_book.to_add_activity(user) - broadcast(user, activity) + if isinstance(item.book, models.Work): + item.book = item.book.default_edition + if not item.book: + return - if item.rating or item.review: - review_title = 'Review of {!r} on Goodreads'.format( - item.book.title, - ) if item.review else '' + if item.shelf: + desired_shelf = models.Shelf.objects.get( + identifier=item.shelf, + user=user + ) + # shelve the book if it hasn't been shelved already + shelf_book, created = models.ShelfBook.objects.get_or_create( + book=item.book, shelf=desired_shelf, added_by=user) + if created: + broadcast(user, shelf_book.to_add_activity(user)) - models.Review.objects.create( - user=user, - book=item.book, - name=review_title, - content=item.review, - rating=item.rating, - ) - for read in item.reads: - read.book = item.book - read.user = user - read.save() + # only add new read-throughs if the item isn't already shelved + for read in item.reads: + read.book = item.book + read.user = user + read.save() - if new_books: - message = 'imported {} books'.format(len(new_books)) - status = create_generated_note(user, message, mention_books=new_books) - status.save() + if item.rating or item.review: + review_title = 'Review of {!r} on Goodreads'.format( + item.book.title, + ) if item.review else '' - broadcast(user, status.to_create_activity(user)) - return status - return None + # we don't know the publication date of the review, + # but "now" is a bad guess + published_date_guess = item.date_read or item.date_added + review = models.Review.objects.create( + user=user, + book=item.book, + name=review_title, + content=item.review, + rating=item.rating, + published_date=published_date_guess, + ) + # we don't need to send out pure activities because non-bookwyrm + # instances don't need this data + broadcast(user, review.to_create_activity(user)) def handle_delete_status(user, status): diff --git a/bookwyrm/tests/models/test_import_model.py b/bookwyrm/tests/models/test_import_model.py index 5e488199..1d5aaa72 100644 --- a/bookwyrm/tests/models/test_import_model.py +++ b/bookwyrm/tests/models/test_import_model.py @@ -24,7 +24,7 @@ class ImportJob(TestCase): 'Number of Pages': 416, 'Year Published': 2019, 'Original Publication Year': 2019, - 'Date Read': '2019/04/09', + 'Date Read': '2019/04/12', 'Date Added': '2019/04/09', 'Bookshelves': '', 'Bookshelves with positions': '', @@ -97,11 +97,9 @@ class ImportJob(TestCase): self.assertEqual(actual.reads[0].finish_date, expected[0].finish_date) def test_read_reads(self): - expected = [models.ReadThrough( - finish_date=datetime.datetime(2019, 4, 9, 0, 0))] actual = models.ImportItem.objects.get(index=2) - self.assertEqual(actual.reads[0].start_date, expected[0].start_date) - self.assertEqual(actual.reads[0].finish_date, expected[0].finish_date) + self.assertEqual(actual.reads[0].start_date, datetime.datetime(2019, 4, 9, 0, 0)) + self.assertEqual(actual.reads[0].finish_date, datetime.datetime(2019, 4, 12, 0, 0)) def test_unread_reads(self): expected = [] diff --git a/bookwyrm/views.py b/bookwyrm/views.py index 9bf02959..b1bb25f4 100644 --- a/bookwyrm/views.py +++ b/bookwyrm/views.py @@ -489,7 +489,8 @@ def book_page(request, book_id): ).values_list('identifier', flat=True) readthroughs = models.ReadThrough.objects.filter( - user=request.user + user=request.user, + book=book, ).order_by('start_date')