Merge pull request #827 from mouse-reeve/import

Cleans up import code
This commit is contained in:
Mouse Reeve 2021-03-30 10:01:23 -07:00 committed by GitHub
commit d021666f2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 135 additions and 93 deletions

View File

@ -1,14 +0,0 @@
""" handle reading a csv from goodreads """
from bookwyrm.importer import Importer
# GoodReads is the default importer, thus Importer follows its structure. For a more complete example of overriding see librarything_import.py
class GoodreadsImporter(Importer):
service = "GoodReads"
def parse_fields(self, data):
data.update({"import_source": self.service})
# add missing 'Date Started' field
data.update({"Date Started": None})
return data

View File

@ -0,0 +1,5 @@
""" import classes """
from .importer import Importer
from .goodreads_import import GoodreadsImporter
from .librarything_import import LibrarythingImporter

View File

@ -0,0 +1,16 @@
""" handle reading a csv from goodreads """
from . import Importer
class GoodreadsImporter(Importer):
"""GoodReads is the default importer, thus Importer follows its structure.
For a more complete example of overriding see librarything_import.py"""
service = "GoodReads"
def parse_fields(self, entry):
""" handle the specific fields in goodreads csvs """
entry.update({"import_source": self.service})
# add missing 'Date Started' field
entry.update({"Date Started": None})
return entry

View File

@ -10,6 +10,8 @@ logger = logging.getLogger(__name__)
class Importer:
""" Generic class for csv data import from an outside service """
service = "Unknown"
delimiter = ","
encoding = "UTF-8"
@ -29,10 +31,12 @@ class Importer:
self.save_item(job, index, entry)
return job
def save_item(self, job, index, data):
def save_item(self, job, index, data): # pylint: disable=no-self-use
""" creates and saves an import item """
ImportItem(job=job, index=index, data=data).save()
def parse_fields(self, entry):
""" updates csv data with additional info """
entry.update({"import_source": self.service})
return entry

View File

@ -1,35 +1,35 @@
""" handle reading a csv from librarything """
import csv
import re
import math
from bookwyrm import models
from bookwyrm.models import ImportItem
from bookwyrm.importer import Importer
from . import Importer
class LibrarythingImporter(Importer):
""" csv downloads from librarything """
service = "LibraryThing"
delimiter = "\t"
encoding = "ISO-8859-1"
# mandatory_fields : fields matching the book title and author
mandatory_fields = ["Title", "Primary Author"]
def parse_fields(self, initial):
def parse_fields(self, entry):
""" custom parsing for librarything """
data = {}
data["import_source"] = self.service
data["Book Id"] = initial["Book Id"]
data["Title"] = initial["Title"]
data["Author"] = initial["Primary Author"]
data["ISBN13"] = initial["ISBN"]
data["My Review"] = initial["Review"]
if initial["Rating"]:
data["My Rating"] = math.ceil(float(initial["Rating"]))
data["Book Id"] = entry["Book Id"]
data["Title"] = entry["Title"]
data["Author"] = entry["Primary Author"]
data["ISBN13"] = entry["ISBN"]
data["My Review"] = entry["Review"]
if entry["Rating"]:
data["My Rating"] = math.ceil(float(entry["Rating"]))
else:
data["My Rating"] = ""
data["Date Added"] = re.sub("\[|\]", "", initial["Entry Date"])
data["Date Started"] = re.sub("\[|\]", "", initial["Date Started"])
data["Date Read"] = re.sub("\[|\]", "", initial["Date Read"])
data["Date Added"] = re.sub(r"\[|\]", "", entry["Entry Date"])
data["Date Started"] = re.sub(r"\[|\]", "", entry["Date Started"])
data["Date Read"] = re.sub(r"\[|\]", "", entry["Date Read"])
data["Exclusive Shelf"] = None
if data["Date Read"]:

View File

@ -7,36 +7,43 @@
{% block content %}
<div class="block">
<h1 class="title">{% trans "Import Books" %}</h1>
<form name="import" action="/import" method="post" enctype="multipart/form-data">
<form class="box" name="import" action="/import" method="post" enctype="multipart/form-data">
{% csrf_token %}
<label class="label" for="source">
<p>{% trans "Data source" %}</p>
<div class="select {{ class }}">
<select name="source" id="source">
<option value="GoodReads" {% if current == 'GoodReads' %}selected{% endif %}>
GoodReads
</option>
<option value="LibraryThing" {% if current == 'LibraryThing' %}selected{% endif %}>
LibraryThing
</option>
</select>
<div class="columns">
<div class="column is-half">
<label class="label" for="source">
{% trans "Data source:" %}
</label>
<div class="select block">
<select name="source" id="source">
<option value="GoodReads" {% if current == 'GoodReads' %}selected{% endif %}>
GoodReads (CSV)
</option>
<option value="LibraryThing" {% if current == 'LibraryThing' %}selected{% endif %}>
LibraryThing (TSV)
</option>
</select>
</div>
<div class="field">
<label class="label" for="id_csv_field">{% trans "Data file:" %}</label>
{{ import_form.csv_file }}
</div>
</div>
</label>
<div class="field">
{{ import_form.as_p }}
<div class="column is-half">
<div class="field">
<label class="label">
<input type="checkbox" name="include_reviews" checked> {% trans "Include reviews" %}
</label>
</div>
<div class="field">
<label class="label">
<p>{% trans "Privacy setting for imported reviews:" %}</p>
{% include 'snippets/privacy_select.html' with no_label=True %}
</label>
</div>
</div>
<div class="field">
<label class="label">
<input type="checkbox" name="include_reviews" checked> {% trans "Include reviews" %}
</label>
</div>
<div class="field">
<label class="label">
<p>{% trans "Privacy setting for imported reviews:" %}</p>
{% include 'snippets/privacy_select.html' with no_label=True %}
</label>
</div>
<button class="button is-primary" type="submit">{% trans "Import" %}</button>
</form>

View File

@ -0,0 +1 @@
from . import *

View File

@ -7,8 +7,9 @@ from unittest.mock import patch
from django.test import TestCase
import responses
from bookwyrm import models, importer
from bookwyrm.goodreads_import import GoodreadsImporter
from bookwyrm import models
from bookwyrm.importers import GoodreadsImporter
from bookwyrm.importers.importer import import_data, handle_imported_book
from bookwyrm.settings import DOMAIN
@ -18,7 +19,7 @@ class GoodreadsImport(TestCase):
def setUp(self):
""" use a test csv """
self.importer = GoodreadsImporter()
datafile = pathlib.Path(__file__).parent.joinpath("data/goodreads.csv")
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
self.csv = open(datafile, "r", encoding=self.importer.encoding)
self.user = models.User.objects.create_user(
"mouse", "mouse@mouse.mouse", "password", local=True
@ -81,7 +82,7 @@ class GoodreadsImport(TestCase):
import_job = self.importer.create_job(self.user, self.csv, False, "unlisted")
MockTask = namedtuple("Task", ("id"))
mock_task = MockTask(7)
with patch("bookwyrm.importer.import_data.delay") as start:
with patch("bookwyrm.importers.importer.import_data.delay") as start:
start.return_value = mock_task
self.importer.start_import(import_job)
import_job.refresh_from_db()
@ -97,8 +98,8 @@ class GoodreadsImport(TestCase):
"bookwyrm.models.import_job.ImportItem.get_book_from_isbn"
) as resolve:
resolve.return_value = book
with patch("bookwyrm.importer.handle_imported_book"):
importer.import_data(self.importer.service, import_job.id)
with patch("bookwyrm.importers.importer.handle_imported_book"):
import_data(self.importer.service, import_job.id)
import_item = models.ImportItem.objects.get(job=import_job, index=0)
self.assertEqual(import_item.book.id, book.id)
@ -109,7 +110,7 @@ class GoodreadsImport(TestCase):
self.assertIsNone(shelf.books.first())
import_job = models.ImportJob.objects.create(user=self.user)
datafile = pathlib.Path(__file__).parent.joinpath("data/goodreads.csv")
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
csv_file = open(datafile, "r")
for index, entry in enumerate(list(csv.DictReader(csv_file))):
entry = self.importer.parse_fields(entry)
@ -119,7 +120,7 @@ class GoodreadsImport(TestCase):
break
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
importer.handle_imported_book(
handle_imported_book(
self.importer.service, self.user, import_item, False, "public"
)
@ -143,7 +144,7 @@ class GoodreadsImport(TestCase):
models.ShelfBook.objects.create(shelf=shelf, user=self.user, book=self.book)
import_job = models.ImportJob.objects.create(user=self.user)
datafile = pathlib.Path(__file__).parent.joinpath("data/goodreads.csv")
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
csv_file = open(datafile, "r")
for index, entry in enumerate(list(csv.DictReader(csv_file))):
entry = self.importer.parse_fields(entry)
@ -153,7 +154,7 @@ class GoodreadsImport(TestCase):
break
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
importer.handle_imported_book(
handle_imported_book(
self.importer.service, self.user, import_item, False, "public"
)
@ -173,7 +174,7 @@ class GoodreadsImport(TestCase):
""" re-importing books """
shelf = self.user.shelf_set.filter(identifier="read").first()
import_job = models.ImportJob.objects.create(user=self.user)
datafile = pathlib.Path(__file__).parent.joinpath("data/goodreads.csv")
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
csv_file = open(datafile, "r")
for index, entry in enumerate(list(csv.DictReader(csv_file))):
entry = self.importer.parse_fields(entry)
@ -183,10 +184,10 @@ class GoodreadsImport(TestCase):
break
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
importer.handle_imported_book(
handle_imported_book(
self.importer.service, self.user, import_item, False, "public"
)
importer.handle_imported_book(
handle_imported_book(
self.importer.service, self.user, import_item, False, "public"
)
@ -207,7 +208,7 @@ class GoodreadsImport(TestCase):
def test_handle_imported_book_review(self, _):
""" goodreads review import """
import_job = models.ImportJob.objects.create(user=self.user)
datafile = pathlib.Path(__file__).parent.joinpath("data/goodreads.csv")
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
csv_file = open(datafile, "r")
entry = list(csv.DictReader(csv_file))[2]
entry = self.importer.parse_fields(entry)
@ -216,7 +217,7 @@ class GoodreadsImport(TestCase):
)
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
importer.handle_imported_book(
handle_imported_book(
self.importer.service, self.user, import_item, True, "unlisted"
)
review = models.Review.objects.get(book=self.book, user=self.user)
@ -230,7 +231,7 @@ class GoodreadsImport(TestCase):
def test_handle_imported_book_reviews_disabled(self):
""" goodreads review import """
import_job = models.ImportJob.objects.create(user=self.user)
datafile = pathlib.Path(__file__).parent.joinpath("data/goodreads.csv")
datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv")
csv_file = open(datafile, "r")
entry = list(csv.DictReader(csv_file))[2]
entry = self.importer.parse_fields(entry)
@ -239,7 +240,7 @@ class GoodreadsImport(TestCase):
)
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
importer.handle_imported_book(
handle_imported_book(
self.importer.service, self.user, import_item, False, "unlisted"
)
self.assertFalse(

View File

@ -6,8 +6,9 @@ from unittest.mock import patch
from django.test import TestCase
import responses
from bookwyrm import models, importer
from bookwyrm.librarything_import import LibrarythingImporter
from bookwyrm import models
from bookwyrm.importers import LibrarythingImporter
from bookwyrm.importers.importer import import_data, handle_imported_book
from bookwyrm.settings import DOMAIN
@ -17,7 +18,7 @@ class LibrarythingImport(TestCase):
def setUp(self):
""" use a test tsv """
self.importer = LibrarythingImporter()
datafile = pathlib.Path(__file__).parent.joinpath("data/librarything.tsv")
datafile = pathlib.Path(__file__).parent.joinpath("../data/librarything.tsv")
# Librarything generates latin encoded exports...
self.csv = open(datafile, "r", encoding=self.importer.encoding)
@ -87,8 +88,8 @@ class LibrarythingImport(TestCase):
"bookwyrm.models.import_job.ImportItem.get_book_from_isbn"
) as resolve:
resolve.return_value = book
with patch("bookwyrm.importer.handle_imported_book"):
importer.import_data(self.importer.service, import_job.id)
with patch("bookwyrm.importers.importer.handle_imported_book"):
import_data(self.importer.service, import_job.id)
import_item = models.ImportItem.objects.get(job=import_job, index=0)
self.assertEqual(import_item.book.id, book.id)
@ -99,7 +100,7 @@ class LibrarythingImport(TestCase):
self.assertIsNone(shelf.books.first())
import_job = models.ImportJob.objects.create(user=self.user)
datafile = pathlib.Path(__file__).parent.joinpath("data/librarything.tsv")
datafile = pathlib.Path(__file__).parent.joinpath("../data/librarything.tsv")
csv_file = open(datafile, "r", encoding=self.importer.encoding)
for index, entry in enumerate(
list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))
@ -111,7 +112,7 @@ class LibrarythingImport(TestCase):
break
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
importer.handle_imported_book(
handle_imported_book(
self.importer.service, self.user, import_item, False, "public"
)
@ -135,7 +136,7 @@ class LibrarythingImport(TestCase):
models.ShelfBook.objects.create(shelf=shelf, user=self.user, book=self.book)
import_job = models.ImportJob.objects.create(user=self.user)
datafile = pathlib.Path(__file__).parent.joinpath("data/librarything.tsv")
datafile = pathlib.Path(__file__).parent.joinpath("../data/librarything.tsv")
csv_file = open(datafile, "r", encoding=self.importer.encoding)
for index, entry in enumerate(
list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))
@ -147,7 +148,7 @@ class LibrarythingImport(TestCase):
break
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
importer.handle_imported_book(
handle_imported_book(
self.importer.service, self.user, import_item, False, "public"
)
@ -167,7 +168,7 @@ class LibrarythingImport(TestCase):
""" re-importing books """
shelf = self.user.shelf_set.filter(identifier="read").first()
import_job = models.ImportJob.objects.create(user=self.user)
datafile = pathlib.Path(__file__).parent.joinpath("data/librarything.tsv")
datafile = pathlib.Path(__file__).parent.joinpath("../data/librarything.tsv")
csv_file = open(datafile, "r", encoding=self.importer.encoding)
for index, entry in enumerate(
list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))
@ -179,10 +180,10 @@ class LibrarythingImport(TestCase):
break
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
importer.handle_imported_book(
handle_imported_book(
self.importer.service, self.user, import_item, False, "public"
)
importer.handle_imported_book(
handle_imported_book(
self.importer.service, self.user, import_item, False, "public"
)
@ -203,7 +204,7 @@ class LibrarythingImport(TestCase):
def test_handle_imported_book_review(self, _):
""" librarything review import """
import_job = models.ImportJob.objects.create(user=self.user)
datafile = pathlib.Path(__file__).parent.joinpath("data/librarything.tsv")
datafile = pathlib.Path(__file__).parent.joinpath("../data/librarything.tsv")
csv_file = open(datafile, "r", encoding=self.importer.encoding)
entry = list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))[0]
entry = self.importer.parse_fields(entry)
@ -212,7 +213,7 @@ class LibrarythingImport(TestCase):
)
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
importer.handle_imported_book(
handle_imported_book(
self.importer.service, self.user, import_item, True, "unlisted"
)
review = models.Review.objects.get(book=self.book, user=self.user)
@ -226,7 +227,7 @@ class LibrarythingImport(TestCase):
def test_handle_imported_book_reviews_disabled(self):
""" librarything review import """
import_job = models.ImportJob.objects.create(user=self.user)
datafile = pathlib.Path(__file__).parent.joinpath("data/librarything.tsv")
datafile = pathlib.Path(__file__).parent.joinpath("../data/librarything.tsv")
csv_file = open(datafile, "r", encoding=self.importer.encoding)
entry = list(csv.DictReader(csv_file, delimiter=self.importer.delimiter))[2]
entry = self.importer.parse_fields(entry)
@ -235,7 +236,7 @@ class LibrarythingImport(TestCase):
)
with patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay"):
importer.handle_imported_book(
handle_imported_book(
self.importer.service, self.user, import_item, False, "unlisted"
)
self.assertFalse(

View File

@ -45,3 +45,22 @@ class ImportViews(TestCase):
self.assertIsInstance(result, TemplateResponse)
result.render()
self.assertEqual(result.status_code, 200)
def test_retry_import(self):
""" retry failed items """
view = views.ImportStatus.as_view()
import_job = models.ImportJob.objects.create(
user=self.local_user, privacy="unlisted"
)
request = self.factory.post("")
request.user = self.local_user
with patch("bookwyrm.importers.Importer.start_import"):
view(request, import_job.id)
self.assertEqual(models.ImportJob.objects.count(), 2)
retry_job = models.ImportJob.objects.last()
self.assertTrue(retry_job.retry)
self.assertEqual(retry_job.user, self.local_user)
self.assertEqual(retry_job.privacy, "unlisted")

View File

@ -9,7 +9,8 @@ from django.template.response import TemplateResponse
from django.utils.decorators import method_decorator
from django.views import View
from bookwyrm import forms, goodreads_import, librarything_import, models
from bookwyrm import forms, models
from bookwyrm.importers import Importer, LibrarythingImporter, GoodreadsImporter
from bookwyrm.tasks import app
# pylint: disable= no-self-use
@ -40,10 +41,10 @@ class Import(View):
importer = None
if source == "LibraryThing":
importer = librarything_import.LibrarythingImporter()
importer = LibrarythingImporter()
else:
# Default : GoodReads
importer = goodreads_import.GoodreadsImporter()
importer = GoodreadsImporter()
try:
job = importer.create_job(
@ -89,10 +90,11 @@ class ImportStatus(View):
for item in request.POST.getlist("import_item"):
items.append(get_object_or_404(models.ImportItem, id=item))
job = goodreads_import.create_retry_job(
importer = Importer()
job = importer.create_retry_job(
request.user,
job,
items,
)
goodreads_import.start_import(job)
importer.start_import(job)
return redirect("/import/%d" % job.id)