Infer format in openlibrary import
This commit is contained in:
parent
47706b5353
commit
123b23728f
|
@ -9,6 +9,7 @@ from requests.exceptions import RequestException
|
||||||
|
|
||||||
from bookwyrm import activitypub, models, settings
|
from bookwyrm import activitypub, models, settings
|
||||||
from .connector_manager import load_more_data, ConnectorException
|
from .connector_manager import load_more_data, ConnectorException
|
||||||
|
from .format_mappings import format_mappings
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -312,3 +313,16 @@ class Mapping:
|
||||||
return self.formatter(value)
|
return self.formatter(value)
|
||||||
except: # pylint: disable=bare-except
|
except: # pylint: disable=bare-except
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def infer_physical_format(format_text):
|
||||||
|
""" try to figure out what the standardized format is from the free value """
|
||||||
|
format_text = format_text.lower()
|
||||||
|
if format_text in format_mappings:
|
||||||
|
# try a direct match
|
||||||
|
return format_mappings[format_text]
|
||||||
|
else:
|
||||||
|
# failing that, try substring
|
||||||
|
matches = [v for k, v in format_mappings.items() if k in format_text]
|
||||||
|
if not matches:
|
||||||
|
return None
|
||||||
|
return matches[0]
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
""" comparing a free text format to the standardized one """
|
||||||
|
format_mappings = {
|
||||||
|
"paperback": "Paperback",
|
||||||
|
"soft": "Paperback",
|
||||||
|
"pamphlet": "Paperback",
|
||||||
|
"peperback": "Paperback",
|
||||||
|
"tapa blanda": "Paperback",
|
||||||
|
"turtleback": "Paperback",
|
||||||
|
"pocket": "Paperback",
|
||||||
|
"spiral": "Paperback",
|
||||||
|
"ring": "Paperback",
|
||||||
|
"平装": "Paperback",
|
||||||
|
"简装": "Paperback",
|
||||||
|
"hardcover": "Hardcover",
|
||||||
|
"hardcocer": "Hardcover",
|
||||||
|
"hardover": "Hardcover",
|
||||||
|
"hardback": "Hardcover",
|
||||||
|
"library": "Hardcover",
|
||||||
|
"tapa dura": "Hardcover",
|
||||||
|
"leather": "Hardcover",
|
||||||
|
"clothbound": "Hardcover",
|
||||||
|
"精装": "Hardcover",
|
||||||
|
"ebook": "EBook",
|
||||||
|
"e-book": "EBook",
|
||||||
|
"digital": "EBook",
|
||||||
|
"computer file": "EBook",
|
||||||
|
"epub": "EBook",
|
||||||
|
"online": "EBook",
|
||||||
|
"pdf": "EBook",
|
||||||
|
"elektronische": "EBook",
|
||||||
|
"electronic": "EBook",
|
||||||
|
"audiobook": "AudiobookFormat",
|
||||||
|
"audio": "AudiobookFormat",
|
||||||
|
"cd": "AudiobookFormat",
|
||||||
|
"dvd": "AudiobookFormat",
|
||||||
|
"mp3": "AudiobookFormat",
|
||||||
|
"cassette": "AudiobookFormat",
|
||||||
|
"kindle": "AudiobookFormat",
|
||||||
|
"talking": "AudiobookFormat",
|
||||||
|
"sound": "AudiobookFormat",
|
||||||
|
"comic": "GraphicNovel",
|
||||||
|
"graphic": "GraphicNovel",
|
||||||
|
}
|
|
@ -3,7 +3,7 @@ import re
|
||||||
|
|
||||||
from bookwyrm import models
|
from bookwyrm import models
|
||||||
from .abstract_connector import AbstractConnector, SearchResult, Mapping
|
from .abstract_connector import AbstractConnector, SearchResult, Mapping
|
||||||
from .abstract_connector import get_data
|
from .abstract_connector import get_data, infer_physical_format
|
||||||
from .connector_manager import ConnectorException
|
from .connector_manager import ConnectorException
|
||||||
from .openlibrary_languages import languages
|
from .openlibrary_languages import languages
|
||||||
|
|
||||||
|
@ -43,7 +43,8 @@ class Connector(AbstractConnector):
|
||||||
),
|
),
|
||||||
Mapping("publishedDate", remote_field="publish_date"),
|
Mapping("publishedDate", remote_field="publish_date"),
|
||||||
Mapping("pages", remote_field="number_of_pages"),
|
Mapping("pages", remote_field="number_of_pages"),
|
||||||
Mapping("physicalFormat", remote_field="physical_format"),
|
Mapping("physicalFormat", remote_field="physical_format", formatter=infer_physical_format),
|
||||||
|
Mapping("physicalFormatDetail", remote_field="physical_format"),
|
||||||
Mapping("publishers"),
|
Mapping("publishers"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
from django.db import migrations
|
from django.db import migrations
|
||||||
import bookwyrm
|
import bookwyrm
|
||||||
|
from bookwyrm.connectors.abstract_connector import infer_physical_format
|
||||||
|
|
||||||
|
|
||||||
def infer_format(app_registry, schema_editor):
|
def infer_format(app_registry, schema_editor):
|
||||||
|
@ -13,59 +14,10 @@ def infer_format(app_registry, schema_editor):
|
||||||
.objects.using(db_alias)
|
.objects.using(db_alias)
|
||||||
.filter(physical_format_detail__isnull=False)
|
.filter(physical_format_detail__isnull=False)
|
||||||
)
|
)
|
||||||
mappings = {
|
|
||||||
"paperback": "Paperback",
|
|
||||||
"soft": "Paperback",
|
|
||||||
"pamphlet": "Paperback",
|
|
||||||
"peperback": "Paperback",
|
|
||||||
"tapa blanda": "Paperback",
|
|
||||||
"turtleback": "Paperback",
|
|
||||||
"pocket": "Paperback",
|
|
||||||
"spiral": "Paperback",
|
|
||||||
"ring": "Paperback",
|
|
||||||
"平装": "Paperback",
|
|
||||||
"简装": "Paperback",
|
|
||||||
"hardcover": "Hardcover",
|
|
||||||
"hardcocer": "Hardcover",
|
|
||||||
"hardover": "Hardcover",
|
|
||||||
"hardback": "Hardcover",
|
|
||||||
"library": "Hardcover",
|
|
||||||
"tapa dura": "Hardcover",
|
|
||||||
"leather": "Hardcover",
|
|
||||||
"clothbound": "Hardcover",
|
|
||||||
"精装": "Hardcover",
|
|
||||||
"ebook": "EBook",
|
|
||||||
"e-book": "EBook",
|
|
||||||
"digital": "EBook",
|
|
||||||
"computer file": "EBook",
|
|
||||||
"epub": "EBook",
|
|
||||||
"online": "EBook",
|
|
||||||
"pdf": "EBook",
|
|
||||||
"elektronische": "EBook",
|
|
||||||
"electronic": "EBook",
|
|
||||||
"audiobook": "AudiobookFormat",
|
|
||||||
"audio": "AudiobookFormat",
|
|
||||||
"cd": "AudiobookFormat",
|
|
||||||
"dvd": "AudiobookFormat",
|
|
||||||
"mp3": "AudiobookFormat",
|
|
||||||
"cassette": "AudiobookFormat",
|
|
||||||
"kindle": "AudiobookFormat",
|
|
||||||
"talking": "AudiobookFormat",
|
|
||||||
"sound": "AudiobookFormat",
|
|
||||||
"comic": "GraphicNovel",
|
|
||||||
"graphic": "GraphicNovel",
|
|
||||||
}
|
|
||||||
for edition in editions:
|
for edition in editions:
|
||||||
free_format = edition.physical_format_detail.lower()
|
free_format = edition.physical_format_detail.lower()
|
||||||
if free_format in mappings:
|
edition.physical_format = infer_physical_format(free_format)
|
||||||
edition.physical_format = mappings[free_format]
|
edition.save()
|
||||||
edition.save()
|
|
||||||
else:
|
|
||||||
matches = [v for k, v in mappings.items() if k in free_format]
|
|
||||||
if not matches:
|
|
||||||
continue
|
|
||||||
edition.physical_format = matches[0]
|
|
||||||
edition.save()
|
|
||||||
|
|
||||||
|
|
||||||
def reverse(app_registry, schema_editor):
|
def reverse(app_registry, schema_editor):
|
||||||
|
|
Loading…
Reference in New Issue