diff --git a/bookwyrm/book_search.py b/bookwyrm/book_search.py new file mode 100644 index 00000000..6c89b61f --- /dev/null +++ b/bookwyrm/book_search.py @@ -0,0 +1,156 @@ +""" using a bookwyrm instance as a source of book data """ +from dataclasses import asdict, dataclass +from functools import reduce +import operator + +from django.contrib.postgres.search import SearchRank, SearchQuery +from django.db.models import OuterRef, Subquery, F, Q + +from bookwyrm import models +from bookwyrm.settings import MEDIA_FULL_URL + + +# pylint: disable=arguments-differ +def search(query, min_confidence=0, filters=None, return_first=False): + """search your local database""" + filters = filters or [] + if not query: + return [] + # first, try searching unqiue identifiers + results = search_identifiers(query, *filters, return_first=return_first) + if not results: + # then try searching title/author + results = search_title_author( + query, min_confidence, *filters, return_first=return_first + ) + return results + + +def isbn_search(query): + """search your local database""" + if not query: + return [] + + filters = [{f: query} for f in ["isbn_10", "isbn_13"]] + results = models.Edition.objects.filter( + reduce(operator.or_, (Q(**f) for f in filters)) + ).distinct() + + # when there are multiple editions of the same work, pick the default. + # it would be odd for this to happen. + + default_editions = models.Edition.objects.filter( + parent_work=OuterRef("parent_work") + ).order_by("-edition_rank") + results = ( + results.annotate(default_id=Subquery(default_editions.values("id")[:1])).filter( + default_id=F("id") + ) + or results + ) + return results + + +def format_search_result(search_result): + """convert a book object into a search result object""" + cover = None + if search_result.cover: + cover = f"{MEDIA_FULL_URL}{search_result.cover}" + + return SearchResult( + title=search_result.title, + key=search_result.remote_id, + author=search_result.author_text, + year=search_result.published_date.year + if search_result.published_date + else None, + cover=cover, + confidence=search_result.rank if hasattr(search_result, "rank") else 1, + connector="", + ).json() + + +def search_identifiers(query, *filters, return_first=False): + """tries remote_id, isbn; defined as dedupe fields on the model""" + # pylint: disable=W0212 + or_filters = [ + {f.name: query} + for f in models.Edition._meta.get_fields() + if hasattr(f, "deduplication_field") and f.deduplication_field + ] + results = models.Edition.objects.filter( + *filters, reduce(operator.or_, (Q(**f) for f in or_filters)) + ).distinct() + if results.count() <= 1: + return results + + # when there are multiple editions of the same work, pick the default. + # it would be odd for this to happen. + default_editions = models.Edition.objects.filter( + parent_work=OuterRef("parent_work") + ).order_by("-edition_rank") + results = ( + results.annotate(default_id=Subquery(default_editions.values("id")[:1])).filter( + default_id=F("id") + ) + or results + ) + if return_first: + return results.first() + return results + + +def search_title_author(query, min_confidence, *filters, return_first=False): + """searches for title and author""" + query = SearchQuery(query, config="simple") | SearchQuery(query, config="english") + results = ( + models.Edition.objects.filter(*filters, search_vector=query) + .annotate(rank=SearchRank(F("search_vector"), query)) + .filter(rank__gt=min_confidence) + .order_by("-rank") + ) + + # when there are multiple editions of the same work, pick the closest + editions_of_work = results.values("parent_work__id").values_list("parent_work__id") + + # filter out multiple editions of the same work + list_results = [] + for work_id in set(editions_of_work): + editions = results.filter(parent_work=work_id) + default = editions.order_by("-edition_rank").first() + default_rank = default.rank if default else 0 + # if mutliple books have the top rank, pick the default edition + if default_rank == editions.first().rank: + result = default + else: + result = editions.first() + if return_first: + return result + list_results.append(result) + return list_results + + +@dataclass +class SearchResult: + """standardized search result object""" + + title: str + key: str + connector: object + view_link: str = None + author: str = None + year: str = None + cover: str = None + confidence: int = 1 + + def __repr__(self): + # pylint: disable=consider-using-f-string + return "".format( + self.key, self.title, self.author + ) + + def json(self): + """serialize a connector for json response""" + serialized = asdict(self) + del serialized["connector"] + return serialized diff --git a/bookwyrm/connectors/__init__.py b/bookwyrm/connectors/__init__.py index 689f2701..efbdb166 100644 --- a/bookwyrm/connectors/__init__.py +++ b/bookwyrm/connectors/__init__.py @@ -3,4 +3,4 @@ from .settings import CONNECTORS from .abstract_connector import ConnectorException from .abstract_connector import get_data, get_image -from .connector_manager import search, local_search, first_search_result +from .connector_manager import search, first_search_result diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 2d10331b..c032986d 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -1,6 +1,5 @@ """ functionality outline for a book data connector """ from abc import ABC, abstractmethod -from dataclasses import asdict, dataclass import logging from django.db import transaction @@ -32,7 +31,6 @@ class AbstractMinimalConnector(ABC): "isbn_search_url", "name", "identifier", - "local", ] for field in self_fields: setattr(self, field, getattr(info, field)) @@ -268,32 +266,6 @@ def get_image(url, timeout=10): return resp -@dataclass -class SearchResult: - """standardized search result object""" - - title: str - key: str - connector: object - view_link: str = None - author: str = None - year: str = None - cover: str = None - confidence: int = 1 - - def __repr__(self): - # pylint: disable=consider-using-f-string - return "".format( - self.key, self.title, self.author - ) - - def json(self): - """serialize a connector for json response""" - serialized = asdict(self) - del serialized["connector"] - return serialized - - class Mapping: """associate a local database field with a field in an external dataset""" diff --git a/bookwyrm/connectors/bookwyrm_connector.py b/bookwyrm/connectors/bookwyrm_connector.py index 10a633b2..6dcba7c3 100644 --- a/bookwyrm/connectors/bookwyrm_connector.py +++ b/bookwyrm/connectors/bookwyrm_connector.py @@ -1,6 +1,7 @@ """ using another bookwyrm instance as a source of book data """ from bookwyrm import activitypub, models -from .abstract_connector import AbstractMinimalConnector, SearchResult +from bookwyrm.book_search import SearchResult +from .abstract_connector import AbstractMinimalConnector class Connector(AbstractMinimalConnector): diff --git a/bookwyrm/connectors/connector_manager.py b/bookwyrm/connectors/connector_manager.py index b676e9aa..45530cd6 100644 --- a/bookwyrm/connectors/connector_manager.py +++ b/bookwyrm/connectors/connector_manager.py @@ -10,7 +10,7 @@ from django.db.models import signals from requests import HTTPError -from bookwyrm import models +from bookwyrm import book_search, models from bookwyrm.tasks import app logger = logging.getLogger(__name__) @@ -55,7 +55,7 @@ def search(query, min_confidence=0.1, return_first=False): # if we found anything, return it return result_set[0] - if result_set or connector.local: + if result_set: results.append( { "connector": connector, @@ -71,22 +71,13 @@ def search(query, min_confidence=0.1, return_first=False): return results -def local_search(query, min_confidence=0.1, raw=False, filters=None): - """only look at local search results""" - connector = load_connector(models.Connector.objects.get(local=True)) - return connector.search( - query, min_confidence=min_confidence, raw=raw, filters=filters - ) - - -def isbn_local_search(query, raw=False): - """only look at local search results""" - connector = load_connector(models.Connector.objects.get(local=True)) - return connector.isbn_search(query, raw=raw) - - def first_search_result(query, min_confidence=0.1): """search until you find a result that fits""" + # try local search first + result = book_search.search(query, min_confidence=min_confidence, return_first=True) + if result: + return result + # otherwise, try remote endpoints return search(query, min_confidence=min_confidence, return_first=True) or None diff --git a/bookwyrm/connectors/inventaire.py b/bookwyrm/connectors/inventaire.py index 1bfd2b50..faed5429 100644 --- a/bookwyrm/connectors/inventaire.py +++ b/bookwyrm/connectors/inventaire.py @@ -2,7 +2,8 @@ import re from bookwyrm import models -from .abstract_connector import AbstractConnector, SearchResult, Mapping +from bookwyrm.book_search import SearchResult +from .abstract_connector import AbstractConnector, Mapping from .abstract_connector import get_data from .connector_manager import ConnectorException diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index ef8a7b3d..b8afc7ca 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -2,7 +2,8 @@ import re from bookwyrm import models -from .abstract_connector import AbstractConnector, SearchResult, Mapping +from bookwyrm.book_search import SearchResult +from .abstract_connector import AbstractConnector, Mapping from .abstract_connector import get_data, infer_physical_format, unique_physical_format from .connector_manager import ConnectorException from .openlibrary_languages import languages diff --git a/bookwyrm/connectors/self_connector.py b/bookwyrm/connectors/self_connector.py deleted file mode 100644 index cdb586cb..00000000 --- a/bookwyrm/connectors/self_connector.py +++ /dev/null @@ -1,164 +0,0 @@ -""" using a bookwyrm instance as a source of book data """ -from functools import reduce -import operator - -from django.contrib.postgres.search import SearchRank, SearchQuery -from django.db.models import OuterRef, Subquery, F, Q - -from bookwyrm import models -from .abstract_connector import AbstractConnector, SearchResult - - -class Connector(AbstractConnector): - """instantiate a connector""" - - # pylint: disable=arguments-differ - def search(self, query, min_confidence=0, raw=False, filters=None): - """search your local database""" - filters = filters or [] - if not query: - return [] - # first, try searching unqiue identifiers - results = search_identifiers(query, *filters) - if not results: - # then try searching title/author - results = search_title_author(query, min_confidence, *filters) - search_results = [] - for result in results: - if raw: - search_results.append(result) - else: - search_results.append(self.format_search_result(result)) - if len(search_results) >= 10: - break - if not raw: - search_results.sort(key=lambda r: r.confidence, reverse=True) - return search_results - - def isbn_search(self, query, raw=False): - """search your local database""" - if not query: - return [] - - filters = [{f: query} for f in ["isbn_10", "isbn_13"]] - results = models.Edition.objects.filter( - reduce(operator.or_, (Q(**f) for f in filters)) - ).distinct() - - # when there are multiple editions of the same work, pick the default. - # it would be odd for this to happen. - - default_editions = models.Edition.objects.filter( - parent_work=OuterRef("parent_work") - ).order_by("-edition_rank") - results = ( - results.annotate( - default_id=Subquery(default_editions.values("id")[:1]) - ).filter(default_id=F("id")) - or results - ) - - search_results = [] - for result in results: - if raw: - search_results.append(result) - else: - search_results.append(self.format_search_result(result)) - if len(search_results) >= 10: - break - return search_results - - def format_search_result(self, search_result): - cover = None - if search_result.cover: - cover = f"{self.covers_url}{search_result.cover}" - - return SearchResult( - title=search_result.title, - key=search_result.remote_id, - author=search_result.author_text, - year=search_result.published_date.year - if search_result.published_date - else None, - connector=self, - cover=cover, - confidence=search_result.rank if hasattr(search_result, "rank") else 1, - ) - - def format_isbn_search_result(self, search_result): - return self.format_search_result(search_result) - - def is_work_data(self, data): - pass - - def get_edition_from_work_data(self, data): - pass - - def get_work_from_edition_data(self, data): - pass - - def get_authors_from_data(self, data): - return None - - def parse_isbn_search_data(self, data): - """it's already in the right format, don't even worry about it""" - return data - - def parse_search_data(self, data): - """it's already in the right format, don't even worry about it""" - return data - - def expand_book_data(self, book): - pass - - -def search_identifiers(query, *filters): - """tries remote_id, isbn; defined as dedupe fields on the model""" - # pylint: disable=W0212 - or_filters = [ - {f.name: query} - for f in models.Edition._meta.get_fields() - if hasattr(f, "deduplication_field") and f.deduplication_field - ] - results = models.Edition.objects.filter( - *filters, reduce(operator.or_, (Q(**f) for f in or_filters)) - ).distinct() - if results.count() <= 1: - return results - - # when there are multiple editions of the same work, pick the default. - # it would be odd for this to happen. - default_editions = models.Edition.objects.filter( - parent_work=OuterRef("parent_work") - ).order_by("-edition_rank") - return ( - results.annotate(default_id=Subquery(default_editions.values("id")[:1])).filter( - default_id=F("id") - ) - or results - ) - - -def search_title_author(query, min_confidence, *filters): - """searches for title and author""" - query = SearchQuery(query, config="simple") | SearchQuery(query, config="english") - results = ( - models.Edition.objects.filter(*filters, search_vector=query) - .annotate(rank=SearchRank(F("search_vector"), query)) - .filter(rank__gt=min_confidence) - .order_by("-rank") - ) - - # when there are multiple editions of the same work, pick the closest - editions_of_work = results.values("parent_work__id").values_list("parent_work__id") - - # filter out multiple editions of the same work - for work_id in set(editions_of_work): - editions = results.filter(parent_work=work_id) - default = editions.order_by("-edition_rank").first() - default_rank = default.rank if default else 0 - # if mutliple books have the top rank, pick the default edition - if default_rank == editions.first().rank: - yield default - else: - yield editions.first() diff --git a/bookwyrm/connectors/settings.py b/bookwyrm/connectors/settings.py index 4cc98da7..927e39b2 100644 --- a/bookwyrm/connectors/settings.py +++ b/bookwyrm/connectors/settings.py @@ -1,3 +1,3 @@ """ settings book data connectors """ -CONNECTORS = ["openlibrary", "inventaire", "self_connector", "bookwyrm_connector"] +CONNECTORS = ["openlibrary", "inventaire", "bookwyrm_connector"] diff --git a/bookwyrm/management/commands/initdb.py b/bookwyrm/management/commands/initdb.py index 71ac511a..d0ab648e 100644 --- a/bookwyrm/management/commands/initdb.py +++ b/bookwyrm/management/commands/initdb.py @@ -4,7 +4,6 @@ from django.contrib.auth.models import Group, Permission from django.contrib.contenttypes.models import ContentType from bookwyrm.models import Connector, FederatedServer, SiteSettings, User -from bookwyrm.settings import DOMAIN def init_groups(): @@ -73,19 +72,6 @@ def init_permissions(): def init_connectors(): """access book data sources""" - Connector.objects.create( - identifier=DOMAIN, - name="Local", - local=True, - connector_file="self_connector", - base_url="https://%s" % DOMAIN, - books_url="https://%s/book" % DOMAIN, - covers_url="https://%s/images/" % DOMAIN, - search_url="https://%s/search?q=" % DOMAIN, - isbn_search_url="https://%s/isbn/" % DOMAIN, - priority=1, - ) - Connector.objects.create( identifier="bookwyrm.social", name="BookWyrm dot Social", diff --git a/bookwyrm/migrations/0102_remove_connector_local.py b/bookwyrm/migrations/0102_remove_connector_local.py new file mode 100644 index 00000000..857f0f58 --- /dev/null +++ b/bookwyrm/migrations/0102_remove_connector_local.py @@ -0,0 +1,41 @@ +# Generated by Django 3.2.5 on 2021-09-30 17:46 + +from django.db import migrations +from bookwyrm.settings import DOMAIN + + +def remove_self_connector(app_registry, schema_editor): + """set the new phsyical format field based on existing format data""" + db_alias = schema_editor.connection.alias + app_registry.get_model("bookwyrm", "Connector").objects.using(db_alias).filter( + connector_file="self_connector" + ).delete() + + +def reverse(app_registry, schema_editor): + """doesn't need to do anything""" + db_alias = schema_editor.connection.alias + model = app_registry.get_model("bookwyrm", "Connector") + model.objects.using(db_alias).create( + identifier=DOMAIN, + name="Local", + local=True, + connector_file="self_connector", + base_url=f"https://{DOMAIN}", + books_url=f"https://{DOMAIN}/book", + covers_url=f"https://{DOMAIN}/images/", + search_url=f"https://{DOMAIN}/search?q=", + isbn_search_url=f"https://{DOMAIN}/isbn/", + priority=1, + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0101_auto_20210929_1847"), + ] + + operations = [ + migrations.RunPython(remove_self_connector, reverse), + ] diff --git a/bookwyrm/migrations/0103_remove_connector_local.py b/bookwyrm/migrations/0103_remove_connector_local.py new file mode 100644 index 00000000..788ce5f8 --- /dev/null +++ b/bookwyrm/migrations/0103_remove_connector_local.py @@ -0,0 +1,17 @@ +# Generated by Django 3.2.5 on 2021-09-30 18:03 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0102_remove_connector_local"), + ] + + operations = [ + migrations.RemoveField( + model_name="connector", + name="local", + ), + ] diff --git a/bookwyrm/models/connector.py b/bookwyrm/models/connector.py index 9d2c6aeb..99e73ab3 100644 --- a/bookwyrm/models/connector.py +++ b/bookwyrm/models/connector.py @@ -14,7 +14,6 @@ class Connector(BookWyrmModel): identifier = models.CharField(max_length=255, unique=True) priority = models.IntegerField(default=2) name = models.CharField(max_length=255, null=True, blank=True) - local = models.BooleanField(default=False) connector_file = models.CharField(max_length=255, choices=ConnectorFiles.choices) api_key = models.CharField(max_length=255, null=True, blank=True) active = models.BooleanField(default=True) diff --git a/bookwyrm/templates/search/book.html b/bookwyrm/templates/search/book.html index 39f83732..704f055b 100644 --- a/bookwyrm/templates/search/book.html +++ b/bookwyrm/templates/search/book.html @@ -8,7 +8,24 @@ @@ -43,7 +60,33 @@ diff --git a/bookwyrm/templates/snippets/search_result_text.html b/bookwyrm/templates/snippets/search_result_text.html deleted file mode 100644 index 40fa5a3d..00000000 --- a/bookwyrm/templates/snippets/search_result_text.html +++ /dev/null @@ -1,41 +0,0 @@ -{% load i18n %} -
-
- {% include 'snippets/book_cover.html' with book=result cover_class='is-w-xs is-h-xs' external_path=True %} -
- -
-

- - {{ result.title }} - -

-

- {% if result.author %} - {{ result.author }} - {% endif %} - - {% if result.year %} - ({{ result.year }}) - {% endif %} -

- - {% if remote_result %} -
- {% csrf_token %} - - - - -
- {% endif %} -
-
diff --git a/bookwyrm/tests/connectors/test_abstract_connector.py b/bookwyrm/tests/connectors/test_abstract_connector.py index 8ce4c96b..a453f613 100644 --- a/bookwyrm/tests/connectors/test_abstract_connector.py +++ b/bookwyrm/tests/connectors/test_abstract_connector.py @@ -119,12 +119,10 @@ class AbstractConnector(TestCase): @responses.activate def test_get_or_create_author(self): """load an author""" - self.connector.author_mappings = ( - [ # pylint: disable=attribute-defined-outside-init - Mapping("id"), - Mapping("name"), - ] - ) + self.connector.author_mappings = [ # pylint: disable=attribute-defined-outside-init # pylint: disable=attribute-defined-outside-init + Mapping("id"), + Mapping("name"), + ] responses.add( responses.GET, diff --git a/bookwyrm/tests/connectors/test_abstract_minimal_connector.py b/bookwyrm/tests/connectors/test_abstract_minimal_connector.py index 84629139..a90ce0c7 100644 --- a/bookwyrm/tests/connectors/test_abstract_minimal_connector.py +++ b/bookwyrm/tests/connectors/test_abstract_minimal_connector.py @@ -4,7 +4,7 @@ import responses from bookwyrm import models from bookwyrm.connectors import abstract_connector -from bookwyrm.connectors.abstract_connector import Mapping, SearchResult +from bookwyrm.connectors.abstract_connector import Mapping class AbstractConnector(TestCase): @@ -53,7 +53,6 @@ class AbstractConnector(TestCase): self.assertEqual(connector.isbn_search_url, "https://example.com/isbn?q=") self.assertIsNone(connector.name) self.assertEqual(connector.identifier, "example.com") - self.assertFalse(connector.local) @responses.activate def test_search(self): @@ -94,19 +93,6 @@ class AbstractConnector(TestCase): results = self.test_connector.isbn_search("123456") self.assertEqual(len(results), 10) - def test_search_result(self): - """a class that stores info about a search result""" - result = SearchResult( - title="Title", - key="https://example.com/book/1", - author="Author Name", - year="1850", - connector=self.test_connector, - ) - # there's really not much to test here, it's just a dataclass - self.assertEqual(result.confidence, 1) - self.assertEqual(result.title, "Title") - def test_create_mapping(self): """maps remote fields for book data to bookwyrm activitypub fields""" mapping = Mapping("isbn") diff --git a/bookwyrm/tests/connectors/test_bookwyrm_connector.py b/bookwyrm/tests/connectors/test_bookwyrm_connector.py index 46ea54a9..585080e6 100644 --- a/bookwyrm/tests/connectors/test_bookwyrm_connector.py +++ b/bookwyrm/tests/connectors/test_bookwyrm_connector.py @@ -4,8 +4,8 @@ import pathlib from django.test import TestCase from bookwyrm import models +from bookwyrm.book_search import SearchResult from bookwyrm.connectors.bookwyrm_connector import Connector -from bookwyrm.connectors.abstract_connector import SearchResult class BookWyrmConnector(TestCase): diff --git a/bookwyrm/tests/connectors/test_connector_manager.py b/bookwyrm/tests/connectors/test_connector_manager.py index 67b108dd..c88a8036 100644 --- a/bookwyrm/tests/connectors/test_connector_manager.py +++ b/bookwyrm/tests/connectors/test_connector_manager.py @@ -5,7 +5,6 @@ import responses from bookwyrm import models from bookwyrm.connectors import connector_manager from bookwyrm.connectors.bookwyrm_connector import Connector as BookWyrmConnector -from bookwyrm.connectors.self_connector import Connector as SelfConnector class ConnectorManager(TestCase): @@ -15,28 +14,16 @@ class ConnectorManager(TestCase): """we'll need some books and a connector info entry""" self.work = models.Work.objects.create(title="Example Work") - self.edition = models.Edition.objects.create( + models.Edition.objects.create( title="Example Edition", parent_work=self.work, isbn_10="0000000000" ) self.edition = models.Edition.objects.create( title="Another Edition", parent_work=self.work, isbn_10="1111111111" ) - self.connector = models.Connector.objects.create( - identifier="test_connector", - priority=1, - local=True, - connector_file="self_connector", - base_url="http://test.com/", - books_url="http://test.com/", - covers_url="http://test.com/", - isbn_search_url="http://test.com/isbn/", - ) - self.remote_connector = models.Connector.objects.create( identifier="test_connector_remote", priority=1, - local=False, connector_file="bookwyrm_connector", base_url="http://fake.ciom/", books_url="http://fake.ciom/", @@ -59,23 +46,22 @@ class ConnectorManager(TestCase): def test_get_connectors(self): """load all connectors""" connectors = list(connector_manager.get_connectors()) - self.assertEqual(len(connectors), 2) - self.assertIsInstance(connectors[0], SelfConnector) - self.assertIsInstance(connectors[1], BookWyrmConnector) + self.assertEqual(len(connectors), 1) + self.assertIsInstance(connectors[0], BookWyrmConnector) @responses.activate - def test_search(self): + def test_search_plaintext(self): """search all connectors""" responses.add( responses.GET, "http://fake.ciom/search/Example?min_confidence=0.1", - json={}, + json=[{"title": "Hello", "key": "https://www.example.com/search/1"}], ) results = connector_manager.search("Example") self.assertEqual(len(results), 1) - self.assertIsInstance(results[0]["connector"], SelfConnector) self.assertEqual(len(results[0]["results"]), 1) - self.assertEqual(results[0]["results"][0].title, "Example Edition") + self.assertEqual(results[0]["connector"].identifier, "test_connector_remote") + self.assertEqual(results[0]["results"][0].title, "Hello") def test_search_empty_query(self): """don't panic on empty queries""" @@ -88,19 +74,13 @@ class ConnectorManager(TestCase): responses.add( responses.GET, "http://fake.ciom/isbn/0000000000", - json={}, + json=[{"title": "Hello", "key": "https://www.example.com/search/1"}], ) results = connector_manager.search("0000000000") self.assertEqual(len(results), 1) - self.assertIsInstance(results[0]["connector"], SelfConnector) self.assertEqual(len(results[0]["results"]), 1) - self.assertEqual(results[0]["results"][0].title, "Example Edition") - - def test_local_search(self): - """search only the local database""" - results = connector_manager.local_search("Example") - self.assertEqual(len(results), 1) - self.assertEqual(results[0].title, "Example Edition") + self.assertEqual(results[0]["connector"].identifier, "test_connector_remote") + self.assertEqual(results[0]["results"][0].title, "Hello") def test_first_search_result(self): """only get one search result""" @@ -125,6 +105,5 @@ class ConnectorManager(TestCase): def test_load_connector(self): """load a connector object from the database entry""" - connector = connector_manager.load_connector(self.connector) - self.assertIsInstance(connector, SelfConnector) - self.assertEqual(connector.identifier, "test_connector") + connector = connector_manager.load_connector(self.remote_connector) + self.assertEqual(connector.identifier, "test_connector_remote") diff --git a/bookwyrm/tests/connectors/test_openlibrary_connector.py b/bookwyrm/tests/connectors/test_openlibrary_connector.py index 699b26ed..75a273d6 100644 --- a/bookwyrm/tests/connectors/test_openlibrary_connector.py +++ b/bookwyrm/tests/connectors/test_openlibrary_connector.py @@ -7,11 +7,11 @@ from django.test import TestCase import responses from bookwyrm import models +from bookwyrm.book_search import SearchResult from bookwyrm.connectors.openlibrary import Connector from bookwyrm.connectors.openlibrary import ignore_edition from bookwyrm.connectors.openlibrary import get_languages, get_description from bookwyrm.connectors.openlibrary import pick_default_edition, get_openlibrary_key -from bookwyrm.connectors.abstract_connector import SearchResult from bookwyrm.connectors.connector_manager import ConnectorException diff --git a/bookwyrm/tests/connectors/test_self_connector.py b/bookwyrm/tests/connectors/test_self_connector.py deleted file mode 100644 index 86aa7add..00000000 --- a/bookwyrm/tests/connectors/test_self_connector.py +++ /dev/null @@ -1,107 +0,0 @@ -""" testing book data connectors """ -import datetime -from django.test import TestCase -from django.utils import timezone - -from bookwyrm import models -from bookwyrm.connectors.self_connector import Connector -from bookwyrm.settings import DOMAIN - - -class SelfConnector(TestCase): - """just uses local data""" - - def setUp(self): - """creating the connector""" - models.Connector.objects.create( - identifier=DOMAIN, - name="Local", - local=True, - connector_file="self_connector", - base_url="https://%s" % DOMAIN, - books_url="https://%s/book" % DOMAIN, - covers_url="https://%s/images/covers" % DOMAIN, - search_url="https://%s/search?q=" % DOMAIN, - priority=1, - ) - self.connector = Connector(DOMAIN) - - def test_format_search_result(self): - """create a SearchResult""" - author = models.Author.objects.create(name="Anonymous") - edition = models.Edition.objects.create( - title="Edition of Example Work", - published_date=datetime.datetime(1980, 5, 10, tzinfo=timezone.utc), - ) - edition.authors.add(author) - result = self.connector.search("Edition of Example")[0] - self.assertEqual(result.title, "Edition of Example Work") - self.assertEqual(result.key, edition.remote_id) - self.assertEqual(result.author, "Anonymous") - self.assertEqual(result.year, 1980) - self.assertEqual(result.connector, self.connector) - - def test_search_rank(self): - """prioritize certain results""" - author = models.Author.objects.create(name="Anonymous") - edition = models.Edition.objects.create( - title="Edition of Example Work", - published_date=datetime.datetime(1980, 5, 10, tzinfo=timezone.utc), - parent_work=models.Work.objects.create(title=""), - ) - # author text is rank B - edition.authors.add(author) - - # series is rank D - models.Edition.objects.create( - title="Another Edition", - series="Anonymous", - parent_work=models.Work.objects.create(title=""), - ) - # subtitle is rank B - models.Edition.objects.create( - title="More Editions", - subtitle="The Anonymous Edition", - parent_work=models.Work.objects.create(title=""), - ) - # title is rank A - models.Edition.objects.create(title="Anonymous") - # doesn't rank in this search - models.Edition.objects.create( - title="An Edition", parent_work=models.Work.objects.create(title="") - ) - - results = self.connector.search("Anonymous") - self.assertEqual(len(results), 4) - self.assertEqual(results[0].title, "Anonymous") - self.assertEqual(results[1].title, "More Editions") - self.assertEqual(results[2].title, "Edition of Example Work") - self.assertEqual(results[3].title, "Another Edition") - - def test_search_multiple_editions(self): - """it should get rid of duplicate editions for the same work""" - work = models.Work.objects.create(title="Work Title") - edition_1 = models.Edition.objects.create( - title="Edition 1 Title", parent_work=work - ) - edition_2 = models.Edition.objects.create( - title="Edition 2 Title", - parent_work=work, - isbn_13="123456789", # this is now the defualt edition - ) - edition_3 = models.Edition.objects.create(title="Fish", parent_work=work) - - # pick the best edition - results = self.connector.search("Edition 1 Title") - self.assertEqual(len(results), 1) - self.assertEqual(results[0].key, edition_1.remote_id) - - # pick the default edition when no match is best - results = self.connector.search("Edition Title") - self.assertEqual(len(results), 1) - self.assertEqual(results[0].key, edition_2.remote_id) - - # only matches one edition, so no deduplication takes place - results = self.connector.search("Fish") - self.assertEqual(len(results), 1) - self.assertEqual(results[0].key, edition_3.remote_id) diff --git a/bookwyrm/tests/importers/test_goodreads_import.py b/bookwyrm/tests/importers/test_goodreads_import.py index 387d9f4f..d2b0ea7d 100644 --- a/bookwyrm/tests/importers/test_goodreads_import.py +++ b/bookwyrm/tests/importers/test_goodreads_import.py @@ -12,7 +12,6 @@ import responses from bookwyrm import models from bookwyrm.importers import GoodreadsImporter from bookwyrm.importers.importer import import_data, handle_imported_book -from bookwyrm.settings import DOMAIN def make_date(*args): @@ -39,17 +38,6 @@ class GoodreadsImport(TestCase): "mouse", "mouse@mouse.mouse", "password", local=True ) - models.Connector.objects.create( - identifier=DOMAIN, - name="Local", - local=True, - connector_file="self_connector", - base_url="https://%s" % DOMAIN, - books_url="https://%s/book" % DOMAIN, - covers_url="https://%s/images/covers" % DOMAIN, - search_url="https://%s/search?q=" % DOMAIN, - priority=1, - ) work = models.Work.objects.create(title="Test Work") self.book = models.Edition.objects.create( title="Example Edition", @@ -125,7 +113,7 @@ class GoodreadsImport(TestCase): import_job = models.ImportJob.objects.create(user=self.user) datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv") - csv_file = open(datafile, "r") + csv_file = open(datafile, "r") # pylint: disable=unspecified-encoding for index, entry in enumerate(list(csv.DictReader(csv_file))): entry = self.importer.parse_fields(entry) import_item = models.ImportItem.objects.create( @@ -162,7 +150,7 @@ class GoodreadsImport(TestCase): import_job = models.ImportJob.objects.create(user=self.user) datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv") - csv_file = open(datafile, "r") + csv_file = open(datafile, "r") # pylint: disable=unspecified-encoding for index, entry in enumerate(list(csv.DictReader(csv_file))): entry = self.importer.parse_fields(entry) import_item = models.ImportItem.objects.create( @@ -192,7 +180,7 @@ class GoodreadsImport(TestCase): shelf = self.user.shelf_set.filter(identifier="read").first() import_job = models.ImportJob.objects.create(user=self.user) datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv") - csv_file = open(datafile, "r") + csv_file = open(datafile, "r") # pylint: disable=unspecified-encoding for index, entry in enumerate(list(csv.DictReader(csv_file))): entry = self.importer.parse_fields(entry) import_item = models.ImportItem.objects.create( @@ -224,7 +212,7 @@ class GoodreadsImport(TestCase): """goodreads review import""" import_job = models.ImportJob.objects.create(user=self.user) datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv") - csv_file = open(datafile, "r") + csv_file = open(datafile, "r") # pylint: disable=unspecified-encoding entry = list(csv.DictReader(csv_file))[2] entry = self.importer.parse_fields(entry) import_item = models.ImportItem.objects.create( @@ -248,7 +236,7 @@ class GoodreadsImport(TestCase): datafile = pathlib.Path(__file__).parent.joinpath( "../data/goodreads-rating.csv" ) - csv_file = open(datafile, "r") + csv_file = open(datafile, "r") # pylint: disable=unspecified-encoding entry = list(csv.DictReader(csv_file))[2] entry = self.importer.parse_fields(entry) import_item = models.ImportItem.objects.create( @@ -269,7 +257,7 @@ class GoodreadsImport(TestCase): """goodreads review import""" import_job = models.ImportJob.objects.create(user=self.user) datafile = pathlib.Path(__file__).parent.joinpath("../data/goodreads.csv") - csv_file = open(datafile, "r") + csv_file = open(datafile, "r") # pylint: disable=unspecified-encoding entry = list(csv.DictReader(csv_file))[2] entry = self.importer.parse_fields(entry) import_item = models.ImportItem.objects.create( diff --git a/bookwyrm/tests/importers/test_librarything_import.py b/bookwyrm/tests/importers/test_librarything_import.py index dfdd515e..ab92c11b 100644 --- a/bookwyrm/tests/importers/test_librarything_import.py +++ b/bookwyrm/tests/importers/test_librarything_import.py @@ -11,7 +11,6 @@ import responses from bookwyrm import models from bookwyrm.importers import LibrarythingImporter from bookwyrm.importers.importer import import_data, handle_imported_book -from bookwyrm.settings import DOMAIN def make_date(*args): @@ -39,18 +38,6 @@ class LibrarythingImport(TestCase): self.user = models.User.objects.create_user( "mmai", "mmai@mmai.mmai", "password", local=True ) - - models.Connector.objects.create( - identifier=DOMAIN, - name="Local", - local=True, - connector_file="self_connector", - base_url="https://%s" % DOMAIN, - books_url="https://%s/book" % DOMAIN, - covers_url="https://%s/images/covers" % DOMAIN, - search_url="https://%s/search?q=" % DOMAIN, - priority=1, - ) work = models.Work.objects.create(title="Test Work") self.book = models.Edition.objects.create( title="Example Edition", diff --git a/bookwyrm/tests/models/test_import_model.py b/bookwyrm/tests/models/test_import_model.py index caf61034..0e5d6760 100644 --- a/bookwyrm/tests/models/test_import_model.py +++ b/bookwyrm/tests/models/test_import_model.py @@ -9,8 +9,8 @@ from django.test import TestCase import responses from bookwyrm import models +from bookwyrm.book_search import SearchResult from bookwyrm.connectors import connector_manager -from bookwyrm.connectors.abstract_connector import SearchResult class ImportJob(TestCase): diff --git a/bookwyrm/tests/test_book_search.py b/bookwyrm/tests/test_book_search.py new file mode 100644 index 00000000..4b9a0681 --- /dev/null +++ b/bookwyrm/tests/test_book_search.py @@ -0,0 +1,118 @@ +""" test searching for books """ +import datetime +from django.test import TestCase +from django.utils import timezone + +from bookwyrm import book_search, models +from bookwyrm.connectors.abstract_connector import AbstractMinimalConnector + + +class BookSearch(TestCase): + """look for some books""" + + def setUp(self): + """we need basic test data and mocks""" + self.work = models.Work.objects.create(title="Example Work") + + self.first_edition = models.Edition.objects.create( + title="Example Edition", + parent_work=self.work, + isbn_10="0000000000", + physical_format="Paperback", + published_date=datetime.datetime(2019, 4, 9, 0, 0, tzinfo=timezone.utc), + ) + self.second_edition = models.Edition.objects.create( + title="Another Edition", + parent_work=self.work, + isbn_10="1111111111", + openlibrary_key="hello", + ) + + def test_search(self): + """search for a book in the db""" + # title/author + results = book_search.search("Example") + self.assertEqual(len(results), 1) + self.assertEqual(results[0], self.first_edition) + + # isbn + results = book_search.search("0000000000") + self.assertEqual(len(results), 1) + self.assertEqual(results[0], self.first_edition) + + # identifier + results = book_search.search("hello") + self.assertEqual(len(results), 1) + self.assertEqual(results[0], self.second_edition) + + def test_isbn_search(self): + """test isbn search""" + results = book_search.isbn_search("0000000000") + self.assertEqual(len(results), 1) + self.assertEqual(results[0], self.first_edition) + + def test_search_identifiers(self): + """search by unique identifiers""" + results = book_search.search_identifiers("hello") + self.assertEqual(len(results), 1) + self.assertEqual(results[0], self.second_edition) + + def test_search_title_author(self): + """search by unique identifiers""" + results = book_search.search_title_author("Another", min_confidence=0) + self.assertEqual(len(results), 1) + self.assertEqual(results[0], self.second_edition) + + def test_format_search_result(self): + """format a search result""" + result = book_search.format_search_result(self.first_edition) + self.assertEqual(result["title"], "Example Edition") + self.assertEqual(result["key"], self.first_edition.remote_id) + self.assertEqual(result["year"], 2019) + + result = book_search.format_search_result(self.second_edition) + self.assertEqual(result["title"], "Another Edition") + self.assertEqual(result["key"], self.second_edition.remote_id) + self.assertIsNone(result["year"]) + + def test_search_result(self): + """a class that stores info about a search result""" + models.Connector.objects.create( + identifier="example.com", + connector_file="openlibrary", + base_url="https://example.com", + books_url="https://example.com/books", + covers_url="https://example.com/covers", + search_url="https://example.com/search?q=", + isbn_search_url="https://example.com/isbn?q=", + ) + + class TestConnector(AbstractMinimalConnector): + """nothing added here""" + + def format_search_result(self, search_result): + return search_result + + def get_or_create_book(self, remote_id): + pass + + def parse_search_data(self, data): + return data + + def format_isbn_search_result(self, search_result): + return search_result + + def parse_isbn_search_data(self, data): + return data + + test_connector = TestConnector("example.com") + result = book_search.SearchResult( + title="Title", + key="https://example.com/book/1", + author="Author Name", + year="1850", + connector=test_connector, + ) + # there's really not much to test here, it's just a dataclass + self.assertEqual(result.confidence, 1) + self.assertEqual(result.title, "Title") diff --git a/bookwyrm/tests/views/test_get_started.py b/bookwyrm/tests/views/test_get_started.py index 135896dc..ff441b57 100644 --- a/bookwyrm/tests/views/test_get_started.py +++ b/bookwyrm/tests/views/test_get_started.py @@ -29,9 +29,6 @@ class GetStartedViews(TestCase): title="Example Edition", remote_id="https://example.com/book/1", ) - models.Connector.objects.create( - identifier="self", connector_file="self_connector", local=True - ) models.SiteSettings.objects.create() def test_profile_view(self, *_): diff --git a/bookwyrm/tests/views/test_isbn.py b/bookwyrm/tests/views/test_isbn.py index a6a45174..bdf72f75 100644 --- a/bookwyrm/tests/views/test_isbn.py +++ b/bookwyrm/tests/views/test_isbn.py @@ -34,9 +34,6 @@ class IsbnViews(TestCase): remote_id="https://example.com/book/1", parent_work=self.work, ) - models.Connector.objects.create( - identifier="self", connector_file="self_connector", local=True - ) models.SiteSettings.objects.create() def test_isbn_json_response(self): @@ -51,4 +48,4 @@ class IsbnViews(TestCase): data = json.loads(response.content) self.assertEqual(len(data), 1) self.assertEqual(data[0]["title"], "Test Book") - self.assertEqual(data[0]["key"], "https://%s/book/%d" % (DOMAIN, self.book.id)) + self.assertEqual(data[0]["key"], f"https://{DOMAIN}/book/{self.book.id}") diff --git a/bookwyrm/tests/views/test_search.py b/bookwyrm/tests/views/test_search.py index dacbcbde..da35f557 100644 --- a/bookwyrm/tests/views/test_search.py +++ b/bookwyrm/tests/views/test_search.py @@ -1,5 +1,6 @@ """ test for app action functionality """ import json +import pathlib from unittest.mock import patch from django.contrib.auth.models import AnonymousUser @@ -7,9 +8,9 @@ from django.http import JsonResponse from django.template.response import TemplateResponse from django.test import TestCase from django.test.client import RequestFactory +import responses from bookwyrm import models, views -from bookwyrm.connectors import abstract_connector from bookwyrm.settings import DOMAIN @@ -36,15 +37,11 @@ class Views(TestCase): remote_id="https://example.com/book/1", parent_work=self.work, ) - models.Connector.objects.create( - identifier="self", connector_file="self_connector", local=True - ) models.SiteSettings.objects.create() def test_search_json_response(self): """searches local data only and returns book data in json format""" view = views.Search.as_view() - # we need a connector for this, sorry request = self.factory.get("", {"q": "Test Book"}) with patch("bookwyrm.views.search.is_api_request") as is_api: is_api.return_value = True @@ -67,28 +64,11 @@ class Views(TestCase): self.assertIsInstance(response, TemplateResponse) response.render() + @responses.activate def test_search_books(self): """searches remote connectors""" view = views.Search.as_view() - class TestConnector(abstract_connector.AbstractMinimalConnector): - """nothing added here""" - - def format_search_result(self, search_result): - pass - - def get_or_create_book(self, remote_id): - pass - - def parse_search_data(self, data): - pass - - def format_isbn_search_result(self, search_result): - return search_result - - def parse_isbn_search_data(self, data): - return data - models.Connector.objects.create( identifier="example.com", connector_file="openlibrary", @@ -97,26 +77,25 @@ class Views(TestCase): covers_url="https://example.com/covers", search_url="https://example.com/search?q=", ) - connector = TestConnector("example.com") - - search_result = abstract_connector.SearchResult( - key="http://www.example.com/book/1", - title="Gideon the Ninth", - author="Tamsyn Muir", - year="2019", - connector=connector, + datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_search.json") + search_data = json.loads(datafile.read_bytes()) + responses.add( + responses.GET, "https://example.com/search?q=Test%20Book", json=search_data ) request = self.factory.get("", {"q": "Test Book", "remote": True}) request.user = self.local_user with patch("bookwyrm.views.search.is_api_request") as is_api: is_api.return_value = False - with patch("bookwyrm.connectors.connector_manager.search") as manager: - manager.return_value = [search_result] - response = view(request) + response = view(request) self.assertIsInstance(response, TemplateResponse) response.render() - self.assertEqual(response.context_data["results"][0].title, "Gideon the Ninth") + connector_results = response.context_data["results"] + self.assertEqual(connector_results[0]["results"][0].title, "Test Book") + self.assertEqual( + connector_results[1]["results"][0].title, + "This Is How You Lose the Time War", + ) def test_search_users(self): """searches remote connectors""" diff --git a/bookwyrm/urls.py b/bookwyrm/urls.py index 360f147f..c81d9790 100644 --- a/bookwyrm/urls.py +++ b/bookwyrm/urls.py @@ -233,6 +233,7 @@ urlpatterns = [ name="direct-messages-user", ), # search + re_path(r"^search.json/?$", views.Search.as_view(), name="search"), re_path(r"^search/?$", views.Search.as_view(), name="search"), # imports re_path(r"^import/?$", views.Import.as_view(), name="import"), diff --git a/bookwyrm/views/get_started.py b/bookwyrm/views/get_started.py index 68eedeff..709b3754 100644 --- a/bookwyrm/views/get_started.py +++ b/bookwyrm/views/get_started.py @@ -10,8 +10,7 @@ from django.template.response import TemplateResponse from django.utils.decorators import method_decorator from django.views import View -from bookwyrm import forms, models -from bookwyrm.connectors import connector_manager +from bookwyrm import book_search, forms, models from bookwyrm.suggested_users import suggested_users from .preferences.edit_user import save_user_form @@ -54,7 +53,7 @@ class GetStartedBooks(View): query = request.GET.get("query") book_results = popular_books = [] if query: - book_results = connector_manager.local_search(query, raw=True)[:5] + book_results = book_search.search(query)[:5] if len(book_results) < 5: popular_books = ( models.Edition.objects.exclude( diff --git a/bookwyrm/views/helpers.py b/bookwyrm/views/helpers.py index c1fe372c..fc5493f5 100644 --- a/bookwyrm/views/helpers.py +++ b/bookwyrm/views/helpers.py @@ -32,7 +32,9 @@ def get_user_from_username(viewer, username): def is_api_request(request): """check whether a request is asking for html or data""" - return "json" in request.headers.get("Accept", "") or request.path[-5:] == ".json" + return "json" in request.headers.get("Accept", "") or re.match( + r".*\.json/?$", request.path + ) def is_bookwyrm_request(request): diff --git a/bookwyrm/views/isbn.py b/bookwyrm/views/isbn.py index 3055a354..e5343488 100644 --- a/bookwyrm/views/isbn.py +++ b/bookwyrm/views/isbn.py @@ -4,7 +4,7 @@ from django.http import JsonResponse from django.template.response import TemplateResponse from django.views import View -from bookwyrm.connectors import connector_manager +from bookwyrm import book_search from bookwyrm.settings import PAGE_LENGTH from .helpers import is_api_request @@ -14,10 +14,12 @@ class Isbn(View): def get(self, request, isbn): """info about a book""" - book_results = connector_manager.isbn_local_search(isbn) + book_results = book_search.isbn_search(isbn) if is_api_request(request): - return JsonResponse([r.json() for r in book_results], safe=False) + return JsonResponse( + [book_search.format_search_result(r) for r in book_results], safe=False + ) paginated = Paginator(book_results, PAGE_LENGTH).get_page( request.GET.get("page") diff --git a/bookwyrm/views/list.py b/bookwyrm/views/list.py index 904e5f1c..a9d77597 100644 --- a/bookwyrm/views/list.py +++ b/bookwyrm/views/list.py @@ -15,9 +15,8 @@ from django.utils.decorators import method_decorator from django.views import View from django.views.decorators.http import require_POST -from bookwyrm import forms, models +from bookwyrm import book_search, forms, models from bookwyrm.activitypub import ActivitypubResponse -from bookwyrm.connectors import connector_manager from bookwyrm.settings import PAGE_LENGTH from .helpers import is_api_request, privacy_filter from .helpers import get_user_from_username @@ -150,9 +149,8 @@ class List(View): if query and request.user.is_authenticated: # search for books - suggestions = connector_manager.local_search( + suggestions = book_search.search( query, - raw=True, filters=[~Q(parent_work__editions__in=book_list.books.all())], ) elif request.user.is_authenticated: diff --git a/bookwyrm/views/search.py b/bookwyrm/views/search.py index d8abdce5..df891266 100644 --- a/bookwyrm/views/search.py +++ b/bookwyrm/views/search.py @@ -10,6 +10,7 @@ from django.views import View from bookwyrm import models from bookwyrm.connectors import connector_manager +from bookwyrm.book_search import search, format_search_result from bookwyrm.settings import PAGE_LENGTH from bookwyrm.utils import regex from .helpers import is_api_request, privacy_filter @@ -31,10 +32,10 @@ class Search(View): if is_api_request(request): # only return local book results via json so we don't cascade - book_results = connector_manager.local_search( - query, min_confidence=min_confidence + book_results = search(query, min_confidence=min_confidence) + return JsonResponse( + [format_search_result(r) for r in book_results], safe=False ) - return JsonResponse([r.json() for r in book_results], safe=False) if query and not search_type: search_type = "user" if "@" in query else "book" @@ -69,13 +70,13 @@ class Search(View): def book_search(query, _, min_confidence, search_remote=False): """the real business is elsewhere""" # try a local-only search - if not search_remote: - results = connector_manager.local_search(query, min_confidence=min_confidence) - if results: - # gret, we found something - return [{"results": results}], False - # if there weere no local results, or the request was for remote, search all sources - return connector_manager.search(query, min_confidence=min_confidence), True + results = [{"results": search(query, min_confidence=min_confidence)}] + if results and not search_remote: + return results, False + + # if there were no local results, or the request was for remote, search all sources + results += connector_manager.search(query, min_confidence=min_confidence) + return results, True def user_search(query, viewer, *_):