Use generalized mappings to handle import
This commit is contained in:
@ -7,10 +7,3 @@ class GoodreadsImporter(Importer):
|
||||
For a more complete example of overriding see librarything_import.py"""
|
||||
|
||||
service = "Goodreads"
|
||||
|
||||
def parse_fields(self, entry):
|
||||
"""handle the specific fields in goodreads csvs"""
|
||||
entry.update({"import_source": self.service})
|
||||
# add missing 'Date Started' field
|
||||
entry.update({"Date Started": None})
|
||||
return entry
|
||||
|
@ -1,5 +1,6 @@
|
||||
""" handle reading a csv from an external service, defaults are from Goodreads """
|
||||
import csv
|
||||
from dataclasses import dataclass
|
||||
import logging
|
||||
|
||||
from django.utils import timezone
|
||||
@ -18,30 +19,59 @@ class Importer:
|
||||
service = "Unknown"
|
||||
delimiter = ","
|
||||
encoding = "UTF-8"
|
||||
mandatory_fields = ["Title", "Author"]
|
||||
|
||||
# these are from Goodreads
|
||||
row_mappings_guesses = {
|
||||
"id": ["id", "book id"],
|
||||
"title": ["title"],
|
||||
"authors": ["author", "authors", "primary author"],
|
||||
"isbn_13": ["isbn13", "isbn"],
|
||||
"isbn_10": ["isbn10", "isbn"],
|
||||
"shelf": ["shelf", "exclusive shelf", "read status"],
|
||||
"review_name": [],
|
||||
"review_body": ["my review"],
|
||||
"rating": ["my rating", "rating", "star rating"],
|
||||
"date_added": ["date added", "entry date", "added"],
|
||||
"date_started": ["date started", "started"],
|
||||
"date_finished": ["date finished", "last date read", "date read", "finished"],
|
||||
}
|
||||
|
||||
def create_job(self, user, csv_file, include_reviews, privacy):
|
||||
"""check over a csv and creates a database entry for the job"""
|
||||
csv_reader = csv.DictReader(csv_file, delimiter=self.delimiter)
|
||||
rows = enumerate(list(csv_reader))
|
||||
job = ImportJob.objects.create(
|
||||
user=user, include_reviews=include_reviews, privacy=privacy
|
||||
user=user,
|
||||
include_reviews=include_reviews,
|
||||
privacy=privacy,
|
||||
mappings=self.create_row_mappings(csv_reader.fieldnames),
|
||||
)
|
||||
for index, entry in enumerate(
|
||||
list(csv.DictReader(csv_file, delimiter=self.delimiter))
|
||||
):
|
||||
if not all(x in entry for x in self.mandatory_fields):
|
||||
raise ValueError("Author and title must be in data.")
|
||||
entry = self.parse_fields(entry)
|
||||
self.save_item(job, index, entry)
|
||||
|
||||
for index, entry in rows:
|
||||
print(index, entry)
|
||||
self.create_item(job, index, entry)
|
||||
return job
|
||||
|
||||
def save_item(self, job, index, data): # pylint: disable=no-self-use
|
||||
"""creates and saves an import item"""
|
||||
ImportItem(job=job, index=index, data=data).save()
|
||||
def create_row_mappings(self, headers):
|
||||
"""guess what the headers mean"""
|
||||
mappings = {}
|
||||
for (key, guesses) in self.row_mappings_guesses.items():
|
||||
value = [h for h in headers if h.lower() in guesses]
|
||||
value = value[0] if len(value) else None
|
||||
if value:
|
||||
headers.remove(value)
|
||||
mappings[key] = value
|
||||
return mappings
|
||||
|
||||
def parse_fields(self, entry):
|
||||
"""updates csv data with additional info"""
|
||||
entry.update({"import_source": self.service})
|
||||
return entry
|
||||
def create_item(self, job, index, data):
|
||||
"""creates and saves an import item"""
|
||||
print(data)
|
||||
normalized = self.normalize_row(data, job.mappings)
|
||||
ImportItem(job=job, index=index, data=data, normalized_data=normalized).save()
|
||||
|
||||
def normalize_row(self, entry, mappings): # pylint: disable=no-self-use
|
||||
"""use the dataclass to create the formatted row of data"""
|
||||
return {k: entry.get(v) for k, v in mappings.items()}
|
||||
|
||||
def create_retry_job(self, user, original_job, items):
|
||||
"""retry items that didn't import"""
|
||||
@ -49,10 +79,13 @@ class Importer:
|
||||
user=user,
|
||||
include_reviews=original_job.include_reviews,
|
||||
privacy=original_job.privacy,
|
||||
# TODO: allow users to adjust mappings
|
||||
mappings=original_job.mappings,
|
||||
retry=True,
|
||||
)
|
||||
for item in items:
|
||||
self.save_item(job, item.index, item.data)
|
||||
# this will re-normalize the raw data
|
||||
self.create_item(job, item.index, item.data)
|
||||
return job
|
||||
|
||||
def start_import(self, job):
|
||||
@ -156,3 +189,23 @@ def handle_imported_book(source, user, item, include_reviews, privacy):
|
||||
)
|
||||
# only broadcast this review to other bookwyrm instances
|
||||
review.save(software="bookwyrm")
|
||||
|
||||
|
||||
@dataclass
|
||||
class ImportEntry:
|
||||
"""data extracted from a line in a csv"""
|
||||
|
||||
title: str
|
||||
authors: str = None
|
||||
isbn_13: str = None
|
||||
isbn_10: str = None
|
||||
shelf: str = None
|
||||
review_name: str = None
|
||||
review_rating: float = None
|
||||
review_body: str = None
|
||||
review_cw: str = None
|
||||
rating: float = None
|
||||
date_added: str = None
|
||||
date_started: str = None
|
||||
date_finished: str = None
|
||||
import_source: str = "Unknown"
|
||||
|
@ -1,7 +1,4 @@
|
||||
""" handle reading a csv from librarything """
|
||||
import re
|
||||
import math
|
||||
|
||||
""" handle reading a tsv from librarything """
|
||||
from . import Importer
|
||||
|
||||
|
||||
@ -11,32 +8,3 @@ class LibrarythingImporter(Importer):
|
||||
service = "LibraryThing"
|
||||
delimiter = "\t"
|
||||
encoding = "ISO-8859-1"
|
||||
# mandatory_fields : fields matching the book title and author
|
||||
mandatory_fields = ["Title", "Primary Author"]
|
||||
|
||||
def parse_fields(self, entry):
|
||||
"""custom parsing for librarything"""
|
||||
data = {}
|
||||
data["import_source"] = self.service
|
||||
data["Book Id"] = entry["Book Id"]
|
||||
data["Title"] = entry["Title"]
|
||||
data["Author"] = entry["Primary Author"]
|
||||
data["ISBN13"] = entry["ISBN"]
|
||||
data["My Review"] = entry["Review"]
|
||||
if entry["Rating"]:
|
||||
data["My Rating"] = math.ceil(float(entry["Rating"]))
|
||||
else:
|
||||
data["My Rating"] = ""
|
||||
data["Date Added"] = re.sub(r"\[|\]", "", entry["Entry Date"])
|
||||
data["Date Started"] = re.sub(r"\[|\]", "", entry["Date Started"])
|
||||
data["Date Read"] = re.sub(r"\[|\]", "", entry["Date Read"])
|
||||
|
||||
data["Exclusive Shelf"] = None
|
||||
if data["Date Read"]:
|
||||
data["Exclusive Shelf"] = "read"
|
||||
elif data["Date Started"]:
|
||||
data["Exclusive Shelf"] = "reading"
|
||||
else:
|
||||
data["Exclusive Shelf"] = "to-read"
|
||||
|
||||
return data
|
||||
|
@ -1,6 +1,4 @@
|
||||
""" handle reading a csv from librarything """
|
||||
import re
|
||||
|
||||
""" handle reading a csv from storygraph"""
|
||||
from . import Importer
|
||||
|
||||
|
||||
@ -8,26 +6,3 @@ class StorygraphImporter(Importer):
|
||||
"""csv downloads from librarything"""
|
||||
|
||||
service = "Storygraph"
|
||||
# mandatory_fields : fields matching the book title and author
|
||||
mandatory_fields = ["Title"]
|
||||
|
||||
def parse_fields(self, entry):
|
||||
"""custom parsing for storygraph"""
|
||||
data = {}
|
||||
data["import_source"] = self.service
|
||||
data["Title"] = entry["Title"]
|
||||
data["Author"] = entry["Authors"] if "Authors" in entry else entry["Author"]
|
||||
data["ISBN13"] = entry["ISBN"]
|
||||
data["My Review"] = entry["Review"]
|
||||
if entry["Star Rating"]:
|
||||
data["My Rating"] = float(entry["Star Rating"])
|
||||
else:
|
||||
data["My Rating"] = ""
|
||||
|
||||
data["Date Added"] = re.sub(r"[/]", "-", entry["Date Added"])
|
||||
data["Date Read"] = re.sub(r"[/]", "-", entry["Last Date Read"])
|
||||
|
||||
data["Exclusive Shelf"] = (
|
||||
{"read": "read", "currently-reading": "reading", "to-read": "to-read"}
|
||||
).get(entry["Read Status"], None)
|
||||
return data
|
||||
|
Reference in New Issue
Block a user