Move importers into module

2021-03-30 08:56:25 -07:00
parent 1f0c4f86c2
commit 754ccaedd6
8 changed files with 16 additions and 10 deletions
--- a/bookwyrm/importers/init.py
+++ b/bookwyrm/importers/init.py
@@ -0,0 +1,5 @@
+""" import classes """
+
+from .importer import Importer
+from .goodreads_import import GoodreadsImporter
+from .librarything_import import LibrarythingImporter
--- a/bookwyrm/importers/goodreads_import.py
+++ b/bookwyrm/importers/goodreads_import.py
@@ -0,0 +1,16 @@
+""" handle reading a csv from goodreads """
+from . import Importer
+
+
+class GoodreadsImporter(Importer):
+    """GoodReads is the default importer, thus Importer follows its structure.
+    For a more complete example of overriding see librarything_import.py"""
+
+    service = "GoodReads"
+
+    def parse_fields(self, entry):
+        """ handle the specific fields in goodreads csvs """
+        entry.update({"import_source": self.service})
+        # add missing 'Date Started' field
+        entry.update({"Date Started": None})
+        return entry
--- a/bookwyrm/importers/importer.py
+++ b/bookwyrm/importers/importer.py
@@ -0,0 +1,139 @@
+""" handle reading a csv from an external service, defaults are from GoodReads """
+import csv
+import logging
+
+from bookwyrm import models
+from bookwyrm.models import ImportJob, ImportItem
+from bookwyrm.tasks import app
+
+logger = logging.getLogger(__name__)
+
+
+class Importer:
+    """ Generic class for csv data import from an outside service """
+
+    service = "Unknown"
+    delimiter = ","
+    encoding = "UTF-8"
+    mandatory_fields = ["Title", "Author"]
+
+    def create_job(self, user, csv_file, include_reviews, privacy):
+        """ check over a csv and creates a database entry for the job"""
+        job = ImportJob.objects.create(
+            user=user, include_reviews=include_reviews, privacy=privacy
+        )
+        for index, entry in enumerate(
+            list(csv.DictReader(csv_file, delimiter=self.delimiter))
+        ):
+            if not all(x in entry for x in self.mandatory_fields):
+                raise ValueError("Author and title must be in data.")
+            entry = self.parse_fields(entry)
+            self.save_item(job, index, entry)
+        return job
+
+    def save_item(self, job, index, data):  # pylint: disable=no-self-use
+        """ creates and saves an import item """
+        ImportItem(job=job, index=index, data=data).save()
+
+    def parse_fields(self, entry):
+        """ updates csv data with additional info """
+        entry.update({"import_source": self.service})
+        return entry
+
+    def create_retry_job(self, user, original_job, items):
+        """ retry items that didn't import """
+        job = ImportJob.objects.create(
+            user=user,
+            include_reviews=original_job.include_reviews,
+            privacy=original_job.privacy,
+            retry=True,
+        )
+        for item in items:
+            self.save_item(job, item.index, item.data)
+        return job
+
+    def start_import(self, job):
+        """ initalizes a csv import job """
+        result = import_data.delay(self.service, job.id)
+        job.task_id = result.id
+        job.save()
+
+
+@app.task
+def import_data(source, job_id):
+    """ does the actual lookup work in a celery task """
+    job = ImportJob.objects.get(id=job_id)
+    try:
+        for item in job.items.all():
+            try:
+                item.resolve()
+            except Exception as e:  # pylint: disable=broad-except
+                logger.exception(e)
+                item.fail_reason = "Error loading book"
+                item.save()
+                continue
+
+            if item.book:
+                item.save()
+
+                # shelves book and handles reviews
+                handle_imported_book(
+                    source, job.user, item, job.include_reviews, job.privacy
+                )
+            else:
+                item.fail_reason = "Could not find a match for book"
+                item.save()
+    finally:
+        job.complete = True
+        job.save()
+
+
+def handle_imported_book(source, user, item, include_reviews, privacy):
+    """ process a csv and then post about it """
+    if isinstance(item.book, models.Work):
+        item.book = item.book.default_edition
+    if not item.book:
+        return
+
+    existing_shelf = models.ShelfBook.objects.filter(book=item.book, user=user).exists()
+
+    # shelve the book if it hasn't been shelved already
+    if item.shelf and not existing_shelf:
+        desired_shelf = models.Shelf.objects.get(identifier=item.shelf, user=user)
+        models.ShelfBook.objects.create(book=item.book, shelf=desired_shelf, user=user)
+
+    for read in item.reads:
+        # check for an existing readthrough with the same dates
+        if models.ReadThrough.objects.filter(
+            user=user,
+            book=item.book,
+            start_date=read.start_date,
+            finish_date=read.finish_date,
+        ).exists():
+            continue
+        read.book = item.book
+        read.user = user
+        read.save()
+
+    if include_reviews and (item.rating or item.review):
+        review_title = (
+            "Review of {!r} on {!r}".format(
+                item.book.title,
+                source,
+            )
+            if item.review
+            else ""
+        )
+
+        # we don't know the publication date of the review,
+        # but "now" is a bad guess
+        published_date_guess = item.date_read or item.date_added
+        models.Review.objects.create(
+            user=user,
+            book=item.book,
+            name=review_title,
+            content=item.review,
+            rating=item.rating,
+            published_date=published_date_guess,
+            privacy=privacy,
+        )
--- a/bookwyrm/importers/librarything_import.py
+++ b/bookwyrm/importers/librarything_import.py
@@ -0,0 +1,42 @@
+""" handle reading a csv from librarything """
+import re
+import math
+
+from . import Importer
+
+
+class LibrarythingImporter(Importer):
+    """ csv downloads from librarything """
+
+    service = "LibraryThing"
+    delimiter = "\t"
+    encoding = "ISO-8859-1"
+    # mandatory_fields : fields matching the book title and author
+    mandatory_fields = ["Title", "Primary Author"]
+
+    def parse_fields(self, entry):
+        """ custom parsing for librarything """
+        data = {}
+        data["import_source"] = self.service
+        data["Book Id"] = entry["Book Id"]
+        data["Title"] = entry["Title"]
+        data["Author"] = entry["Primary Author"]
+        data["ISBN13"] = entry["ISBN"]
+        data["My Review"] = entry["Review"]
+        if entry["Rating"]:
+            data["My Rating"] = math.ceil(float(entry["Rating"]))
+        else:
+            data["My Rating"] = ""
+        data["Date Added"] = re.sub(r"\[|\]", "", entry["Entry Date"])
+        data["Date Started"] = re.sub(r"\[|\]", "", entry["Date Started"])
+        data["Date Read"] = re.sub(r"\[|\]", "", entry["Date Read"])
+
+        data["Exclusive Shelf"] = None
+        if data["Date Read"]:
+            data["Exclusive Shelf"] = "read"
+        elif data["Date Started"]:
+            data["Exclusive Shelf"] = "reading"
+        else:
+            data["Exclusive Shelf"] = "to-read"
+
+        return data