Use generalized mappings to handle import

This commit is contained in:
Mouse Reeve
2021-11-10 16:49:54 -08:00
parent 0736c7e160
commit 4ccd9fc633
7 changed files with 152 additions and 178 deletions

View File

@ -1,5 +1,6 @@
""" handle reading a csv from an external service, defaults are from Goodreads """
import csv
from dataclasses import dataclass
import logging
from django.utils import timezone
@ -18,30 +19,59 @@ class Importer:
service = "Unknown"
delimiter = ","
encoding = "UTF-8"
mandatory_fields = ["Title", "Author"]
# these are from Goodreads
row_mappings_guesses = {
"id": ["id", "book id"],
"title": ["title"],
"authors": ["author", "authors", "primary author"],
"isbn_13": ["isbn13", "isbn"],
"isbn_10": ["isbn10", "isbn"],
"shelf": ["shelf", "exclusive shelf", "read status"],
"review_name": [],
"review_body": ["my review"],
"rating": ["my rating", "rating", "star rating"],
"date_added": ["date added", "entry date", "added"],
"date_started": ["date started", "started"],
"date_finished": ["date finished", "last date read", "date read", "finished"],
}
def create_job(self, user, csv_file, include_reviews, privacy):
"""check over a csv and creates a database entry for the job"""
csv_reader = csv.DictReader(csv_file, delimiter=self.delimiter)
rows = enumerate(list(csv_reader))
job = ImportJob.objects.create(
user=user, include_reviews=include_reviews, privacy=privacy
user=user,
include_reviews=include_reviews,
privacy=privacy,
mappings=self.create_row_mappings(csv_reader.fieldnames),
)
for index, entry in enumerate(
list(csv.DictReader(csv_file, delimiter=self.delimiter))
):
if not all(x in entry for x in self.mandatory_fields):
raise ValueError("Author and title must be in data.")
entry = self.parse_fields(entry)
self.save_item(job, index, entry)
for index, entry in rows:
print(index, entry)
self.create_item(job, index, entry)
return job
def save_item(self, job, index, data): # pylint: disable=no-self-use
"""creates and saves an import item"""
ImportItem(job=job, index=index, data=data).save()
def create_row_mappings(self, headers):
"""guess what the headers mean"""
mappings = {}
for (key, guesses) in self.row_mappings_guesses.items():
value = [h for h in headers if h.lower() in guesses]
value = value[0] if len(value) else None
if value:
headers.remove(value)
mappings[key] = value
return mappings
def parse_fields(self, entry):
"""updates csv data with additional info"""
entry.update({"import_source": self.service})
return entry
def create_item(self, job, index, data):
"""creates and saves an import item"""
print(data)
normalized = self.normalize_row(data, job.mappings)
ImportItem(job=job, index=index, data=data, normalized_data=normalized).save()
def normalize_row(self, entry, mappings): # pylint: disable=no-self-use
"""use the dataclass to create the formatted row of data"""
return {k: entry.get(v) for k, v in mappings.items()}
def create_retry_job(self, user, original_job, items):
"""retry items that didn't import"""
@ -49,10 +79,13 @@ class Importer:
user=user,
include_reviews=original_job.include_reviews,
privacy=original_job.privacy,
# TODO: allow users to adjust mappings
mappings=original_job.mappings,
retry=True,
)
for item in items:
self.save_item(job, item.index, item.data)
# this will re-normalize the raw data
self.create_item(job, item.index, item.data)
return job
def start_import(self, job):
@ -156,3 +189,23 @@ def handle_imported_book(source, user, item, include_reviews, privacy):
)
# only broadcast this review to other bookwyrm instances
review.save(software="bookwyrm")
@dataclass
class ImportEntry:
"""data extracted from a line in a csv"""
title: str
authors: str = None
isbn_13: str = None
isbn_10: str = None
shelf: str = None
review_name: str = None
review_rating: float = None
review_body: str = None
review_cw: str = None
rating: float = None
date_added: str = None
date_started: str = None
date_finished: str = None
import_source: str = "Unknown"