Merge branch 'main' into search-refactor
This commit is contained in:
@ -8,6 +8,7 @@ from requests.exceptions import RequestException
|
||||
|
||||
from bookwyrm import activitypub, models, settings
|
||||
from .connector_manager import load_more_data, ConnectorException
|
||||
from .format_mappings import format_mappings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -41,7 +42,7 @@ class AbstractMinimalConnector(ABC):
|
||||
params["min_confidence"] = min_confidence
|
||||
|
||||
data = self.get_search_data(
|
||||
"%s%s" % (self.search_url, query),
|
||||
f"{self.search_url}{query}",
|
||||
params=params,
|
||||
timeout=timeout,
|
||||
)
|
||||
@ -55,7 +56,7 @@ class AbstractMinimalConnector(ABC):
|
||||
"""isbn search"""
|
||||
params = {}
|
||||
data = self.get_search_data(
|
||||
"%s%s" % (self.isbn_search_url, query),
|
||||
f"{self.isbn_search_url}{query}",
|
||||
params=params,
|
||||
)
|
||||
results = []
|
||||
@ -129,7 +130,7 @@ class AbstractConnector(AbstractMinimalConnector):
|
||||
work_data = data
|
||||
|
||||
if not work_data or not edition_data:
|
||||
raise ConnectorException("Unable to load book data: %s" % remote_id)
|
||||
raise ConnectorException(f"Unable to load book data: {remote_id}")
|
||||
|
||||
with transaction.atomic():
|
||||
# create activitypub object
|
||||
@ -220,9 +221,7 @@ def get_data(url, params=None, timeout=10):
|
||||
"""wrapper for request.get"""
|
||||
# check if the url is blocked
|
||||
if models.FederatedServer.is_blocked(url):
|
||||
raise ConnectorException(
|
||||
"Attempting to load data from blocked url: {:s}".format(url)
|
||||
)
|
||||
raise ConnectorException(f"Attempting to load data from blocked url: {url}")
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
@ -286,3 +285,25 @@ class Mapping:
|
||||
return self.formatter(value)
|
||||
except: # pylint: disable=bare-except
|
||||
return None
|
||||
|
||||
|
||||
def infer_physical_format(format_text):
|
||||
"""try to figure out what the standardized format is from the free value"""
|
||||
format_text = format_text.lower()
|
||||
if format_text in format_mappings:
|
||||
# try a direct match
|
||||
return format_mappings[format_text]
|
||||
# failing that, try substring
|
||||
matches = [v for k, v in format_mappings.items() if k in format_text]
|
||||
if not matches:
|
||||
return None
|
||||
return matches[0]
|
||||
|
||||
|
||||
def unique_physical_format(format_text):
|
||||
"""only store the format if it isn't diretly in the format mappings"""
|
||||
format_text = format_text.lower()
|
||||
if format_text in format_mappings:
|
||||
# try a direct match, so saving this would be redundant
|
||||
return None
|
||||
return format_text
|
||||
|
@ -100,10 +100,10 @@ def get_or_create_connector(remote_id):
|
||||
connector_info = models.Connector.objects.create(
|
||||
identifier=identifier,
|
||||
connector_file="bookwyrm_connector",
|
||||
base_url="https://%s" % identifier,
|
||||
books_url="https://%s/book" % identifier,
|
||||
covers_url="https://%s/images/covers" % identifier,
|
||||
search_url="https://%s/search?q=" % identifier,
|
||||
base_url=f"https://{identifier}",
|
||||
books_url=f"https://{identifier}/book",
|
||||
covers_url=f"https://{identifier}/images/covers",
|
||||
search_url=f"https://{identifier}/search?q=",
|
||||
priority=2,
|
||||
)
|
||||
|
||||
@ -122,7 +122,7 @@ def load_more_data(connector_id, book_id):
|
||||
def load_connector(connector_info):
|
||||
"""instantiate the connector class"""
|
||||
connector = importlib.import_module(
|
||||
"bookwyrm.connectors.%s" % connector_info.connector_file
|
||||
f"bookwyrm.connectors.{connector_info.connector_file}"
|
||||
)
|
||||
return connector.Connector(connector_info.identifier)
|
||||
|
||||
@ -132,4 +132,4 @@ def load_connector(connector_info):
|
||||
def create_connector(sender, instance, created, *args, **kwargs):
|
||||
"""create a connector to an external bookwyrm server"""
|
||||
if instance.application_type == "bookwyrm":
|
||||
get_or_create_connector("https://{:s}".format(instance.server_name))
|
||||
get_or_create_connector(f"https://{instance.server_name}")
|
||||
|
43
bookwyrm/connectors/format_mappings.py
Normal file
43
bookwyrm/connectors/format_mappings.py
Normal file
@ -0,0 +1,43 @@
|
||||
""" comparing a free text format to the standardized one """
|
||||
format_mappings = {
|
||||
"paperback": "Paperback",
|
||||
"soft": "Paperback",
|
||||
"pamphlet": "Paperback",
|
||||
"peperback": "Paperback",
|
||||
"tapa blanda": "Paperback",
|
||||
"turtleback": "Paperback",
|
||||
"pocket": "Paperback",
|
||||
"spiral": "Paperback",
|
||||
"ring": "Paperback",
|
||||
"平装": "Paperback",
|
||||
"简装": "Paperback",
|
||||
"hardcover": "Hardcover",
|
||||
"hardcocer": "Hardcover",
|
||||
"hardover": "Hardcover",
|
||||
"hardback": "Hardcover",
|
||||
"library": "Hardcover",
|
||||
"tapa dura": "Hardcover",
|
||||
"leather": "Hardcover",
|
||||
"clothbound": "Hardcover",
|
||||
"精装": "Hardcover",
|
||||
"ebook": "EBook",
|
||||
"e-book": "EBook",
|
||||
"digital": "EBook",
|
||||
"computer file": "EBook",
|
||||
"epub": "EBook",
|
||||
"online": "EBook",
|
||||
"pdf": "EBook",
|
||||
"elektronische": "EBook",
|
||||
"electronic": "EBook",
|
||||
"audiobook": "AudiobookFormat",
|
||||
"audio": "AudiobookFormat",
|
||||
"cd": "AudiobookFormat",
|
||||
"dvd": "AudiobookFormat",
|
||||
"mp3": "AudiobookFormat",
|
||||
"cassette": "AudiobookFormat",
|
||||
"kindle": "AudiobookFormat",
|
||||
"talking": "AudiobookFormat",
|
||||
"sound": "AudiobookFormat",
|
||||
"comic": "GraphicNovel",
|
||||
"graphic": "GraphicNovel",
|
||||
}
|
@ -9,7 +9,7 @@ from .connector_manager import ConnectorException
|
||||
|
||||
|
||||
class Connector(AbstractConnector):
|
||||
"""instantiate a connector for OL"""
|
||||
"""instantiate a connector for inventaire"""
|
||||
|
||||
def __init__(self, identifier):
|
||||
super().__init__(identifier)
|
||||
@ -60,7 +60,7 @@ class Connector(AbstractConnector):
|
||||
|
||||
def get_remote_id(self, value):
|
||||
"""convert an id/uri into a url"""
|
||||
return "{:s}?action=by-uris&uris={:s}".format(self.books_url, value)
|
||||
return f"{self.books_url}?action=by-uris&uris={value}"
|
||||
|
||||
def get_book_data(self, remote_id):
|
||||
data = get_data(remote_id)
|
||||
@ -88,11 +88,7 @@ class Connector(AbstractConnector):
|
||||
|
||||
def format_search_result(self, search_result):
|
||||
images = search_result.get("image")
|
||||
cover = (
|
||||
"{:s}/img/entities/{:s}".format(self.covers_url, images[0])
|
||||
if images
|
||||
else None
|
||||
)
|
||||
cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None
|
||||
# a deeply messy translation of inventaire's scores
|
||||
confidence = float(search_result.get("_score", 0.1))
|
||||
confidence = 0.1 if confidence < 150 else 0.999
|
||||
@ -100,9 +96,7 @@ class Connector(AbstractConnector):
|
||||
title=search_result.get("label"),
|
||||
key=self.get_remote_id(search_result.get("uri")),
|
||||
author=search_result.get("description"),
|
||||
view_link="{:s}/entity/{:s}".format(
|
||||
self.base_url, search_result.get("uri")
|
||||
),
|
||||
view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
|
||||
cover=cover,
|
||||
confidence=confidence,
|
||||
connector=self,
|
||||
@ -124,9 +118,7 @@ class Connector(AbstractConnector):
|
||||
title=title[0],
|
||||
key=self.get_remote_id(search_result.get("uri")),
|
||||
author=search_result.get("description"),
|
||||
view_link="{:s}/entity/{:s}".format(
|
||||
self.base_url, search_result.get("uri")
|
||||
),
|
||||
view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
|
||||
cover=self.get_cover_url(search_result.get("image")),
|
||||
connector=self,
|
||||
)
|
||||
@ -136,11 +128,7 @@ class Connector(AbstractConnector):
|
||||
|
||||
def load_edition_data(self, work_uri):
|
||||
"""get a list of editions for a work"""
|
||||
url = (
|
||||
"{:s}?action=reverse-claims&property=wdt:P629&value={:s}&sort=true".format(
|
||||
self.books_url, work_uri
|
||||
)
|
||||
)
|
||||
url = f"{self.books_url}?action=reverse-claims&property=wdt:P629&value={work_uri}&sort=true"
|
||||
return get_data(url)
|
||||
|
||||
def get_edition_from_work_data(self, data):
|
||||
@ -196,7 +184,7 @@ class Connector(AbstractConnector):
|
||||
# cover may or may not be an absolute url already
|
||||
if re.match(r"^http", cover_id):
|
||||
return cover_id
|
||||
return "%s%s" % (self.covers_url, cover_id)
|
||||
return f"{self.covers_url}{cover_id}"
|
||||
|
||||
def resolve_keys(self, keys):
|
||||
"""cool, it's "wd:Q3156592" now what the heck does that mean"""
|
||||
@ -214,9 +202,7 @@ class Connector(AbstractConnector):
|
||||
link = links.get("enwiki")
|
||||
if not link:
|
||||
return ""
|
||||
url = "{:s}/api/data?action=wp-extract&lang=en&title={:s}".format(
|
||||
self.base_url, link
|
||||
)
|
||||
url = f"{self.base_url}/api/data?action=wp-extract&lang=en&title={link}"
|
||||
try:
|
||||
data = get_data(url)
|
||||
except ConnectorException:
|
||||
|
@ -4,7 +4,7 @@ import re
|
||||
from bookwyrm import models
|
||||
from bookwyrm.book_search import SearchResult
|
||||
from .abstract_connector import AbstractConnector, Mapping
|
||||
from .abstract_connector import get_data
|
||||
from .abstract_connector import get_data, infer_physical_format, unique_physical_format
|
||||
from .connector_manager import ConnectorException
|
||||
from .openlibrary_languages import languages
|
||||
|
||||
@ -44,7 +44,16 @@ class Connector(AbstractConnector):
|
||||
),
|
||||
Mapping("publishedDate", remote_field="publish_date"),
|
||||
Mapping("pages", remote_field="number_of_pages"),
|
||||
Mapping("physicalFormat", remote_field="physical_format"),
|
||||
Mapping(
|
||||
"physicalFormat",
|
||||
remote_field="physical_format",
|
||||
formatter=infer_physical_format,
|
||||
),
|
||||
Mapping(
|
||||
"physicalFormatDetail",
|
||||
remote_field="physical_format",
|
||||
formatter=unique_physical_format,
|
||||
),
|
||||
Mapping("publishers"),
|
||||
]
|
||||
|
||||
@ -72,7 +81,7 @@ class Connector(AbstractConnector):
|
||||
key = data["key"]
|
||||
except KeyError:
|
||||
raise ConnectorException("Invalid book data")
|
||||
return "%s%s" % (self.books_url, key)
|
||||
return f"{self.books_url}{key}"
|
||||
|
||||
def is_work_data(self, data):
|
||||
return bool(re.match(r"^[\/\w]+OL\d+W$", data["key"]))
|
||||
@ -82,7 +91,7 @@ class Connector(AbstractConnector):
|
||||
key = data["key"]
|
||||
except KeyError:
|
||||
raise ConnectorException("Invalid book data")
|
||||
url = "%s%s/editions" % (self.books_url, key)
|
||||
url = f"{self.books_url}{key}/editions"
|
||||
data = self.get_book_data(url)
|
||||
edition = pick_default_edition(data["entries"])
|
||||
if not edition:
|
||||
@ -94,7 +103,7 @@ class Connector(AbstractConnector):
|
||||
key = data["works"][0]["key"]
|
||||
except (IndexError, KeyError):
|
||||
raise ConnectorException("No work found for edition")
|
||||
url = "%s%s" % (self.books_url, key)
|
||||
url = f"{self.books_url}{key}"
|
||||
return self.get_book_data(url)
|
||||
|
||||
def get_authors_from_data(self, data):
|
||||
@ -103,7 +112,7 @@ class Connector(AbstractConnector):
|
||||
author_blob = author_blob.get("author", author_blob)
|
||||
# this id is "/authors/OL1234567A"
|
||||
author_id = author_blob["key"]
|
||||
url = "%s%s" % (self.base_url, author_id)
|
||||
url = f"{self.base_url}{author_id}"
|
||||
author = self.get_or_create_author(url)
|
||||
if not author:
|
||||
continue
|
||||
@ -114,8 +123,8 @@ class Connector(AbstractConnector):
|
||||
if not cover_blob:
|
||||
return None
|
||||
cover_id = cover_blob[0]
|
||||
image_name = "%s-%s.jpg" % (cover_id, size)
|
||||
return "%s/b/id/%s" % (self.covers_url, image_name)
|
||||
image_name = f"{cover_id}-{size}.jpg"
|
||||
return f"{self.covers_url}/b/id/{image_name}"
|
||||
|
||||
def parse_search_data(self, data):
|
||||
return data.get("docs")
|
||||
@ -153,7 +162,7 @@ class Connector(AbstractConnector):
|
||||
|
||||
def load_edition_data(self, olkey):
|
||||
"""query openlibrary for editions of a work"""
|
||||
url = "%s/works/%s/editions" % (self.books_url, olkey)
|
||||
url = f"{self.books_url}/works/{olkey}/editions"
|
||||
return self.get_book_data(url)
|
||||
|
||||
def expand_book_data(self, book):
|
||||
|
Reference in New Issue
Block a user