Merge branch 'main' into search-refactor

This commit is contained in:
Mouse Reeve
2021-09-30 10:40:57 -07:00
218 changed files with 8892 additions and 6942 deletions

View File

@ -8,6 +8,7 @@ from requests.exceptions import RequestException
from bookwyrm import activitypub, models, settings
from .connector_manager import load_more_data, ConnectorException
from .format_mappings import format_mappings
logger = logging.getLogger(__name__)
@ -41,7 +42,7 @@ class AbstractMinimalConnector(ABC):
params["min_confidence"] = min_confidence
data = self.get_search_data(
"%s%s" % (self.search_url, query),
f"{self.search_url}{query}",
params=params,
timeout=timeout,
)
@ -55,7 +56,7 @@ class AbstractMinimalConnector(ABC):
"""isbn search"""
params = {}
data = self.get_search_data(
"%s%s" % (self.isbn_search_url, query),
f"{self.isbn_search_url}{query}",
params=params,
)
results = []
@ -129,7 +130,7 @@ class AbstractConnector(AbstractMinimalConnector):
work_data = data
if not work_data or not edition_data:
raise ConnectorException("Unable to load book data: %s" % remote_id)
raise ConnectorException(f"Unable to load book data: {remote_id}")
with transaction.atomic():
# create activitypub object
@ -220,9 +221,7 @@ def get_data(url, params=None, timeout=10):
"""wrapper for request.get"""
# check if the url is blocked
if models.FederatedServer.is_blocked(url):
raise ConnectorException(
"Attempting to load data from blocked url: {:s}".format(url)
)
raise ConnectorException(f"Attempting to load data from blocked url: {url}")
try:
resp = requests.get(
@ -286,3 +285,25 @@ class Mapping:
return self.formatter(value)
except: # pylint: disable=bare-except
return None
def infer_physical_format(format_text):
"""try to figure out what the standardized format is from the free value"""
format_text = format_text.lower()
if format_text in format_mappings:
# try a direct match
return format_mappings[format_text]
# failing that, try substring
matches = [v for k, v in format_mappings.items() if k in format_text]
if not matches:
return None
return matches[0]
def unique_physical_format(format_text):
"""only store the format if it isn't diretly in the format mappings"""
format_text = format_text.lower()
if format_text in format_mappings:
# try a direct match, so saving this would be redundant
return None
return format_text

View File

@ -100,10 +100,10 @@ def get_or_create_connector(remote_id):
connector_info = models.Connector.objects.create(
identifier=identifier,
connector_file="bookwyrm_connector",
base_url="https://%s" % identifier,
books_url="https://%s/book" % identifier,
covers_url="https://%s/images/covers" % identifier,
search_url="https://%s/search?q=" % identifier,
base_url=f"https://{identifier}",
books_url=f"https://{identifier}/book",
covers_url=f"https://{identifier}/images/covers",
search_url=f"https://{identifier}/search?q=",
priority=2,
)
@ -122,7 +122,7 @@ def load_more_data(connector_id, book_id):
def load_connector(connector_info):
"""instantiate the connector class"""
connector = importlib.import_module(
"bookwyrm.connectors.%s" % connector_info.connector_file
f"bookwyrm.connectors.{connector_info.connector_file}"
)
return connector.Connector(connector_info.identifier)
@ -132,4 +132,4 @@ def load_connector(connector_info):
def create_connector(sender, instance, created, *args, **kwargs):
"""create a connector to an external bookwyrm server"""
if instance.application_type == "bookwyrm":
get_or_create_connector("https://{:s}".format(instance.server_name))
get_or_create_connector(f"https://{instance.server_name}")

View File

@ -0,0 +1,43 @@
""" comparing a free text format to the standardized one """
format_mappings = {
"paperback": "Paperback",
"soft": "Paperback",
"pamphlet": "Paperback",
"peperback": "Paperback",
"tapa blanda": "Paperback",
"turtleback": "Paperback",
"pocket": "Paperback",
"spiral": "Paperback",
"ring": "Paperback",
"平装": "Paperback",
"简装": "Paperback",
"hardcover": "Hardcover",
"hardcocer": "Hardcover",
"hardover": "Hardcover",
"hardback": "Hardcover",
"library": "Hardcover",
"tapa dura": "Hardcover",
"leather": "Hardcover",
"clothbound": "Hardcover",
"精装": "Hardcover",
"ebook": "EBook",
"e-book": "EBook",
"digital": "EBook",
"computer file": "EBook",
"epub": "EBook",
"online": "EBook",
"pdf": "EBook",
"elektronische": "EBook",
"electronic": "EBook",
"audiobook": "AudiobookFormat",
"audio": "AudiobookFormat",
"cd": "AudiobookFormat",
"dvd": "AudiobookFormat",
"mp3": "AudiobookFormat",
"cassette": "AudiobookFormat",
"kindle": "AudiobookFormat",
"talking": "AudiobookFormat",
"sound": "AudiobookFormat",
"comic": "GraphicNovel",
"graphic": "GraphicNovel",
}

View File

@ -9,7 +9,7 @@ from .connector_manager import ConnectorException
class Connector(AbstractConnector):
"""instantiate a connector for OL"""
"""instantiate a connector for inventaire"""
def __init__(self, identifier):
super().__init__(identifier)
@ -60,7 +60,7 @@ class Connector(AbstractConnector):
def get_remote_id(self, value):
"""convert an id/uri into a url"""
return "{:s}?action=by-uris&uris={:s}".format(self.books_url, value)
return f"{self.books_url}?action=by-uris&uris={value}"
def get_book_data(self, remote_id):
data = get_data(remote_id)
@ -88,11 +88,7 @@ class Connector(AbstractConnector):
def format_search_result(self, search_result):
images = search_result.get("image")
cover = (
"{:s}/img/entities/{:s}".format(self.covers_url, images[0])
if images
else None
)
cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None
# a deeply messy translation of inventaire's scores
confidence = float(search_result.get("_score", 0.1))
confidence = 0.1 if confidence < 150 else 0.999
@ -100,9 +96,7 @@ class Connector(AbstractConnector):
title=search_result.get("label"),
key=self.get_remote_id(search_result.get("uri")),
author=search_result.get("description"),
view_link="{:s}/entity/{:s}".format(
self.base_url, search_result.get("uri")
),
view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
cover=cover,
confidence=confidence,
connector=self,
@ -124,9 +118,7 @@ class Connector(AbstractConnector):
title=title[0],
key=self.get_remote_id(search_result.get("uri")),
author=search_result.get("description"),
view_link="{:s}/entity/{:s}".format(
self.base_url, search_result.get("uri")
),
view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
cover=self.get_cover_url(search_result.get("image")),
connector=self,
)
@ -136,11 +128,7 @@ class Connector(AbstractConnector):
def load_edition_data(self, work_uri):
"""get a list of editions for a work"""
url = (
"{:s}?action=reverse-claims&property=wdt:P629&value={:s}&sort=true".format(
self.books_url, work_uri
)
)
url = f"{self.books_url}?action=reverse-claims&property=wdt:P629&value={work_uri}&sort=true"
return get_data(url)
def get_edition_from_work_data(self, data):
@ -196,7 +184,7 @@ class Connector(AbstractConnector):
# cover may or may not be an absolute url already
if re.match(r"^http", cover_id):
return cover_id
return "%s%s" % (self.covers_url, cover_id)
return f"{self.covers_url}{cover_id}"
def resolve_keys(self, keys):
"""cool, it's "wd:Q3156592" now what the heck does that mean"""
@ -214,9 +202,7 @@ class Connector(AbstractConnector):
link = links.get("enwiki")
if not link:
return ""
url = "{:s}/api/data?action=wp-extract&lang=en&title={:s}".format(
self.base_url, link
)
url = f"{self.base_url}/api/data?action=wp-extract&lang=en&title={link}"
try:
data = get_data(url)
except ConnectorException:

View File

@ -4,7 +4,7 @@ import re
from bookwyrm import models
from bookwyrm.book_search import SearchResult
from .abstract_connector import AbstractConnector, Mapping
from .abstract_connector import get_data
from .abstract_connector import get_data, infer_physical_format, unique_physical_format
from .connector_manager import ConnectorException
from .openlibrary_languages import languages
@ -44,7 +44,16 @@ class Connector(AbstractConnector):
),
Mapping("publishedDate", remote_field="publish_date"),
Mapping("pages", remote_field="number_of_pages"),
Mapping("physicalFormat", remote_field="physical_format"),
Mapping(
"physicalFormat",
remote_field="physical_format",
formatter=infer_physical_format,
),
Mapping(
"physicalFormatDetail",
remote_field="physical_format",
formatter=unique_physical_format,
),
Mapping("publishers"),
]
@ -72,7 +81,7 @@ class Connector(AbstractConnector):
key = data["key"]
except KeyError:
raise ConnectorException("Invalid book data")
return "%s%s" % (self.books_url, key)
return f"{self.books_url}{key}"
def is_work_data(self, data):
return bool(re.match(r"^[\/\w]+OL\d+W$", data["key"]))
@ -82,7 +91,7 @@ class Connector(AbstractConnector):
key = data["key"]
except KeyError:
raise ConnectorException("Invalid book data")
url = "%s%s/editions" % (self.books_url, key)
url = f"{self.books_url}{key}/editions"
data = self.get_book_data(url)
edition = pick_default_edition(data["entries"])
if not edition:
@ -94,7 +103,7 @@ class Connector(AbstractConnector):
key = data["works"][0]["key"]
except (IndexError, KeyError):
raise ConnectorException("No work found for edition")
url = "%s%s" % (self.books_url, key)
url = f"{self.books_url}{key}"
return self.get_book_data(url)
def get_authors_from_data(self, data):
@ -103,7 +112,7 @@ class Connector(AbstractConnector):
author_blob = author_blob.get("author", author_blob)
# this id is "/authors/OL1234567A"
author_id = author_blob["key"]
url = "%s%s" % (self.base_url, author_id)
url = f"{self.base_url}{author_id}"
author = self.get_or_create_author(url)
if not author:
continue
@ -114,8 +123,8 @@ class Connector(AbstractConnector):
if not cover_blob:
return None
cover_id = cover_blob[0]
image_name = "%s-%s.jpg" % (cover_id, size)
return "%s/b/id/%s" % (self.covers_url, image_name)
image_name = f"{cover_id}-{size}.jpg"
return f"{self.covers_url}/b/id/{image_name}"
def parse_search_data(self, data):
return data.get("docs")
@ -153,7 +162,7 @@ class Connector(AbstractConnector):
def load_edition_data(self, olkey):
"""query openlibrary for editions of a work"""
url = "%s/works/%s/editions" % (self.books_url, olkey)
url = f"{self.books_url}/works/{olkey}/editions"
return self.get_book_data(url)
def expand_book_data(self, book):