2021-03-08 11:49:10 -05:00
|
|
|
""" using a bookwyrm instance as a source of book data """
|
2021-01-02 18:48:59 -05:00
|
|
|
from functools import reduce
|
|
|
|
import operator
|
|
|
|
|
2021-06-24 13:56:27 -04:00
|
|
|
from django.contrib.postgres.search import SearchRank, SearchQuery
|
2021-06-17 17:46:58 -04:00
|
|
|
from django.db.models import OuterRef, Subquery, F, Q
|
2020-03-28 15:55:53 -04:00
|
|
|
|
2020-09-21 11:10:37 -04:00
|
|
|
from bookwyrm import models
|
2020-04-29 13:57:20 -04:00
|
|
|
from .abstract_connector import AbstractConnector, SearchResult
|
2020-03-28 15:55:53 -04:00
|
|
|
|
|
|
|
|
|
|
|
class Connector(AbstractConnector):
|
2021-04-26 12:15:42 -04:00
|
|
|
"""instantiate a connector"""
|
2021-03-08 11:49:10 -05:00
|
|
|
|
2021-01-31 14:11:26 -05:00
|
|
|
# pylint: disable=arguments-differ
|
2021-06-26 12:12:23 -04:00
|
|
|
def search(self, query, min_confidence=0, raw=False, filters=None):
|
2021-04-26 12:15:42 -04:00
|
|
|
"""search your local database"""
|
2021-04-26 11:02:30 -04:00
|
|
|
filters = filters or []
|
2021-01-07 12:26:05 -05:00
|
|
|
if not query:
|
|
|
|
return []
|
2021-01-02 18:48:59 -05:00
|
|
|
# first, try searching unqiue identifiers
|
2021-04-26 11:02:30 -04:00
|
|
|
results = search_identifiers(query, *filters)
|
2021-01-02 18:48:59 -05:00
|
|
|
if not results:
|
|
|
|
# then try searching title/author
|
2021-04-26 11:02:30 -04:00
|
|
|
results = search_title_author(query, min_confidence, *filters)
|
2020-04-29 13:57:20 -04:00
|
|
|
search_results = []
|
2021-01-02 18:48:59 -05:00
|
|
|
for result in results:
|
2021-01-31 14:11:26 -05:00
|
|
|
if raw:
|
|
|
|
search_results.append(result)
|
|
|
|
else:
|
|
|
|
search_results.append(self.format_search_result(result))
|
2021-01-02 18:15:25 -05:00
|
|
|
if len(search_results) >= 10:
|
|
|
|
break
|
2021-01-31 14:11:26 -05:00
|
|
|
if not raw:
|
|
|
|
search_results.sort(key=lambda r: r.confidence, reverse=True)
|
2020-04-29 13:57:20 -04:00
|
|
|
return search_results
|
2020-03-28 15:55:53 -04:00
|
|
|
|
2021-03-01 15:09:21 -05:00
|
|
|
def isbn_search(self, query, raw=False):
|
2021-04-26 12:15:42 -04:00
|
|
|
"""search your local database"""
|
2021-03-01 15:09:21 -05:00
|
|
|
if not query:
|
|
|
|
return []
|
|
|
|
|
2021-03-08 11:49:10 -05:00
|
|
|
filters = [{f: query} for f in ["isbn_10", "isbn_13"]]
|
2021-03-01 15:09:21 -05:00
|
|
|
results = models.Edition.objects.filter(
|
|
|
|
reduce(operator.or_, (Q(**f) for f in filters))
|
|
|
|
).distinct()
|
|
|
|
|
|
|
|
# when there are multiple editions of the same work, pick the default.
|
|
|
|
# it would be odd for this to happen.
|
2021-04-28 18:19:24 -04:00
|
|
|
|
|
|
|
default_editions = models.Edition.objects.filter(
|
|
|
|
parent_work=OuterRef("parent_work")
|
|
|
|
).order_by("-edition_rank")
|
|
|
|
results = (
|
|
|
|
results.annotate(
|
|
|
|
default_id=Subquery(default_editions.values("id")[:1])
|
|
|
|
).filter(default_id=F("id"))
|
|
|
|
or results
|
|
|
|
)
|
2021-03-01 15:09:21 -05:00
|
|
|
|
|
|
|
search_results = []
|
|
|
|
for result in results:
|
|
|
|
if raw:
|
|
|
|
search_results.append(result)
|
|
|
|
else:
|
|
|
|
search_results.append(self.format_search_result(result))
|
|
|
|
if len(search_results) >= 10:
|
|
|
|
break
|
|
|
|
return search_results
|
|
|
|
|
2020-09-21 13:25:26 -04:00
|
|
|
def format_search_result(self, search_result):
|
2021-04-29 16:03:56 -04:00
|
|
|
cover = None
|
|
|
|
if search_result.cover:
|
|
|
|
cover = "%s%s" % (self.covers_url, search_result.cover)
|
|
|
|
|
2020-05-04 00:00:25 -04:00
|
|
|
return SearchResult(
|
2020-10-29 18:29:23 -04:00
|
|
|
title=search_result.title,
|
2020-11-13 12:47:35 -05:00
|
|
|
key=search_result.remote_id,
|
2020-10-29 18:29:23 -04:00
|
|
|
author=search_result.author_text,
|
2021-03-08 11:49:10 -05:00
|
|
|
year=search_result.published_date.year
|
|
|
|
if search_result.published_date
|
|
|
|
else None,
|
2020-12-27 17:27:18 -05:00
|
|
|
connector=self,
|
2021-04-29 16:03:56 -04:00
|
|
|
cover=cover,
|
2021-03-08 11:49:10 -05:00
|
|
|
confidence=search_result.rank if hasattr(search_result, "rank") else 1,
|
2020-05-04 00:00:25 -04:00
|
|
|
)
|
|
|
|
|
2021-03-01 15:09:21 -05:00
|
|
|
def format_isbn_search_result(self, search_result):
|
2021-03-13 16:55:20 -05:00
|
|
|
return self.format_search_result(search_result)
|
2021-03-01 15:09:21 -05:00
|
|
|
|
2020-05-10 15:56:59 -04:00
|
|
|
def is_work_data(self, data):
|
|
|
|
pass
|
2020-03-28 15:55:53 -04:00
|
|
|
|
2020-05-10 15:56:59 -04:00
|
|
|
def get_edition_from_work_data(self, data):
|
|
|
|
pass
|
2020-03-28 15:55:53 -04:00
|
|
|
|
2020-12-27 17:27:18 -05:00
|
|
|
def get_work_from_edition_data(self, data):
|
2020-05-10 15:56:59 -04:00
|
|
|
pass
|
2020-05-09 16:36:10 -04:00
|
|
|
|
|
|
|
def get_authors_from_data(self, data):
|
|
|
|
return None
|
|
|
|
|
2021-03-01 15:09:21 -05:00
|
|
|
def parse_isbn_search_data(self, data):
|
2021-04-26 12:15:42 -04:00
|
|
|
"""it's already in the right format, don't even worry about it"""
|
2021-03-01 15:09:21 -05:00
|
|
|
return data
|
|
|
|
|
2020-05-10 15:56:59 -04:00
|
|
|
def parse_search_data(self, data):
|
2021-04-26 12:15:42 -04:00
|
|
|
"""it's already in the right format, don't even worry about it"""
|
2020-05-10 15:56:59 -04:00
|
|
|
return data
|
2020-04-29 13:57:20 -04:00
|
|
|
|
|
|
|
def expand_book_data(self, book):
|
|
|
|
pass
|
2021-01-02 18:48:59 -05:00
|
|
|
|
|
|
|
|
2021-04-26 11:02:30 -04:00
|
|
|
def search_identifiers(query, *filters):
|
2021-04-26 12:15:42 -04:00
|
|
|
"""tries remote_id, isbn; defined as dedupe fields on the model"""
|
2021-06-18 17:29:24 -04:00
|
|
|
# pylint: disable=W0212
|
2021-04-26 11:02:30 -04:00
|
|
|
or_filters = [
|
2021-03-08 11:49:10 -05:00
|
|
|
{f.name: query}
|
|
|
|
for f in models.Edition._meta.get_fields()
|
|
|
|
if hasattr(f, "deduplication_field") and f.deduplication_field
|
|
|
|
]
|
2021-01-02 18:48:59 -05:00
|
|
|
results = models.Edition.objects.filter(
|
2021-04-26 11:02:30 -04:00
|
|
|
*filters, reduce(operator.or_, (Q(**f) for f in or_filters))
|
2021-01-02 18:48:59 -05:00
|
|
|
).distinct()
|
2021-06-17 17:46:58 -04:00
|
|
|
if results.count() <= 1:
|
|
|
|
return results
|
2021-01-02 18:48:59 -05:00
|
|
|
|
|
|
|
# when there are multiple editions of the same work, pick the default.
|
|
|
|
# it would be odd for this to happen.
|
2021-04-28 18:19:24 -04:00
|
|
|
default_editions = models.Edition.objects.filter(
|
|
|
|
parent_work=OuterRef("parent_work")
|
|
|
|
).order_by("-edition_rank")
|
|
|
|
return (
|
2021-06-17 17:48:19 -04:00
|
|
|
results.annotate(default_id=Subquery(default_editions.values("id")[:1])).filter(
|
|
|
|
default_id=F("id")
|
|
|
|
)
|
2021-04-28 18:19:24 -04:00
|
|
|
or results
|
|
|
|
)
|
2021-01-02 18:48:59 -05:00
|
|
|
|
|
|
|
|
2021-04-26 11:02:30 -04:00
|
|
|
def search_title_author(query, min_confidence, *filters):
|
2021-04-26 12:15:42 -04:00
|
|
|
"""searches for title and author"""
|
2021-06-24 13:56:27 -04:00
|
|
|
query = SearchQuery(query, config="simple") | SearchQuery(query, config="english")
|
2021-03-08 11:49:10 -05:00
|
|
|
results = (
|
2021-06-26 21:55:09 -04:00
|
|
|
models.Edition.objects.filter(*filters, search_vector=query)
|
|
|
|
.annotate(rank=SearchRank(F("search_vector"), query))
|
|
|
|
.filter(rank__gt=min_confidence)
|
2021-03-08 11:49:10 -05:00
|
|
|
.order_by("-rank")
|
|
|
|
)
|
2021-01-02 18:48:59 -05:00
|
|
|
|
|
|
|
# when there are multiple editions of the same work, pick the closest
|
2021-06-17 17:48:19 -04:00
|
|
|
editions_of_work = results.values("parent_work__id").values_list("parent_work__id")
|
2021-01-02 18:48:59 -05:00
|
|
|
|
2021-06-17 17:46:58 -04:00
|
|
|
# filter out multiple editions of the same work
|
2021-01-02 18:48:59 -05:00
|
|
|
for work_id in set(editions_of_work):
|
|
|
|
editions = results.filter(parent_work=work_id)
|
2021-04-28 18:19:24 -04:00
|
|
|
default = editions.order_by("-edition_rank").first()
|
|
|
|
default_rank = default.rank if default else 0
|
2021-01-02 18:48:59 -05:00
|
|
|
# if mutliple books have the top rank, pick the default edition
|
|
|
|
if default_rank == editions.first().rank:
|
2021-04-28 18:19:24 -04:00
|
|
|
yield default
|
2021-01-02 18:48:59 -05:00
|
|
|
else:
|
|
|
|
yield editions.first()
|