Merge branch 'main' into progress_update

This commit is contained in:
Joel Bradshaw
2021-01-13 19:45:36 -08:00
179 changed files with 9338 additions and 3228 deletions

View File

@ -1,98 +1,114 @@
''' testing book data connectors '''
from unittest.mock import patch
from django.test import TestCase
import responses
from bookwyrm import models
from bookwyrm.connectors import abstract_connector
from bookwyrm.connectors.abstract_connector import Mapping
from bookwyrm.connectors.bookwyrm_connector import Connector
from bookwyrm.settings import DOMAIN
class AbstractConnector(TestCase):
''' generic code for connecting to outside data sources '''
def setUp(self):
self.book = models.Edition.objects.create(title='Example Edition')
models.Connector.objects.create(
''' we need an example connector '''
self.connector_info = models.Connector.objects.create(
identifier='example.com',
connector_file='bookwyrm_connector',
connector_file='openlibrary',
base_url='https://example.com',
books_url='https:/example.com',
covers_url='https://example.com',
books_url='https://example.com/books',
covers_url='https://example.com/covers',
search_url='https://example.com/search?q=',
)
self.connector = Connector('example.com')
self.data = {
'title': 'Unused title',
'ASIN': 'A00BLAH',
'isbn_10': '1234567890',
'isbn_13': 'blahhh',
'blah': 'bip',
'format': 'hardcover',
'series': ['one', 'two'],
work_data = {
'id': 'abc1',
'title': 'Test work',
'type': 'work',
'openlibraryKey': 'OL1234W',
}
self.connector.key_mappings = [
Mapping('isbn_10', model=models.Edition),
Mapping('isbn_13'),
Mapping('lccn', model=models.Work),
Mapping('asin', remote_field='ASIN'),
self.work_data = work_data
edition_data = {
'id': 'abc2',
'title': 'Test edition',
'type': 'edition',
'openlibraryKey': 'OL1234M',
}
self.edition_data = edition_data
class TestConnector(abstract_connector.AbstractConnector):
''' nothing added here '''
def format_search_result(self, search_result):
return search_result
def parse_search_data(self, data):
return data
def is_work_data(self, data):
return data['type'] == 'work'
def get_edition_from_work_data(self, data):
return edition_data
def get_work_from_edition_data(self, data):
return work_data
def get_authors_from_data(self, data):
return []
def expand_book_data(self, book):
pass
self.connector = TestConnector('example.com')
self.connector.book_mappings = [
Mapping('id'),
Mapping('title'),
Mapping('openlibraryKey'),
]
def test_create_mapping(self):
mapping = Mapping('isbn')
self.assertEqual(mapping.local_field, 'isbn')
self.assertEqual(mapping.remote_field, 'isbn')
self.assertEqual(mapping.model, None)
self.assertEqual(mapping.formatter('bb'), 'bb')
self.book = models.Edition.objects.create(
title='Test Book', remote_id='https://example.com/book/1234',
openlibrary_key='OL1234M')
def test_create_mapping_with_remote(self):
mapping = Mapping('isbn', remote_field='isbn13')
self.assertEqual(mapping.local_field, 'isbn')
self.assertEqual(mapping.remote_field, 'isbn13')
self.assertEqual(mapping.model, None)
self.assertEqual(mapping.formatter('bb'), 'bb')
def test_abstract_connector_init(self):
''' barebones connector for search with defaults '''
self.assertIsInstance(self.connector.book_mappings, list)
def test_create_mapping_with_formatter(self):
formatter = lambda x: 'aa' + x
mapping = Mapping('isbn', formatter=formatter)
self.assertEqual(mapping.local_field, 'isbn')
self.assertEqual(mapping.remote_field, 'isbn')
self.assertEqual(mapping.formatter, formatter)
self.assertEqual(mapping.model, None)
self.assertEqual(mapping.formatter('bb'), 'aabb')
def test_is_available(self):
''' this isn't used.... '''
self.assertTrue(self.connector.is_available())
self.connector.max_query_count = 1
self.connector.connector.query_count = 2
self.assertFalse(self.connector.is_available())
def test_match_from_mappings(self):
edition = models.Edition.objects.create(
title='Blah',
isbn_13='blahhh',
def test_get_or_create_book_existing(self):
''' find an existing book by remote/origin id '''
self.assertEqual(models.Book.objects.count(), 1)
self.assertEqual(
self.book.remote_id, 'https://%s/book/%d' % (DOMAIN, self.book.id))
self.assertEqual(
self.book.origin_id, 'https://example.com/book/1234')
# dedupe by origin id
result = self.connector.get_or_create_book(
'https://example.com/book/1234')
self.assertEqual(models.Book.objects.count(), 1)
self.assertEqual(result, self.book)
# dedupe by remote id
result = self.connector.get_or_create_book(
'https://%s/book/%d' % (DOMAIN, self.book.id))
self.assertEqual(models.Book.objects.count(), 1)
self.assertEqual(result, self.book)
@responses.activate
def test_get_or_create_book_deduped(self):
''' load remote data and deduplicate '''
responses.add(
responses.GET,
'https://example.com/book/abcd',
json=self.edition_data
)
match = self.connector.match_from_mappings(self.data, models.Edition)
self.assertEqual(match, edition)
def test_match_from_mappings_with_model(self):
edition = models.Edition.objects.create(
title='Blah',
isbn_10='1234567890',
)
match = self.connector.match_from_mappings(self.data, models.Edition)
self.assertEqual(match, edition)
def test_match_from_mappings_with_remote(self):
edition = models.Edition.objects.create(
title='Blah',
asin='A00BLAH',
)
match = self.connector.match_from_mappings(self.data, models.Edition)
self.assertEqual(match, edition)
def test_match_from_mappings_no_match(self):
edition = models.Edition.objects.create(
title='Blah',
)
match = self.connector.match_from_mappings(self.data, models.Edition)
self.assertEqual(match, None)
with patch(
'bookwyrm.connectors.abstract_connector.load_more_data.delay'):
result = self.connector.get_or_create_book(
'https://example.com/book/abcd')
self.assertEqual(result, self.book)
self.assertEqual(models.Edition.objects.count(), 1)
self.assertEqual(models.Edition.objects.count(), 1)

View File

@ -0,0 +1,100 @@
''' testing book data connectors '''
from django.test import TestCase
import responses
from bookwyrm import models
from bookwyrm.connectors import abstract_connector
from bookwyrm.connectors.abstract_connector import Mapping, SearchResult
class AbstractConnector(TestCase):
''' generic code for connecting to outside data sources '''
def setUp(self):
''' we need an example connector '''
self.connector_info = models.Connector.objects.create(
identifier='example.com',
connector_file='openlibrary',
base_url='https://example.com',
books_url='https://example.com/books',
covers_url='https://example.com/covers',
search_url='https://example.com/search?q=',
)
class TestConnector(abstract_connector.AbstractMinimalConnector):
''' nothing added here '''
def format_search_result(self, search_result):
return search_result
def get_or_create_book(self, remote_id):
pass
def parse_search_data(self, data):
return data
self.test_connector = TestConnector('example.com')
def test_abstract_minimal_connector_init(self):
''' barebones connector for search with defaults '''
connector = self.test_connector
self.assertEqual(connector.connector, self.connector_info)
self.assertEqual(connector.base_url, 'https://example.com')
self.assertEqual(connector.books_url, 'https://example.com/books')
self.assertEqual(connector.covers_url, 'https://example.com/covers')
self.assertEqual(connector.search_url, 'https://example.com/search?q=')
self.assertIsNone(connector.name)
self.assertEqual(connector.identifier, 'example.com')
self.assertIsNone(connector.max_query_count)
self.assertFalse(connector.local)
@responses.activate
def test_search(self):
''' makes an http request to the outside service '''
responses.add(
responses.GET,
'https://example.com/search?q=a%20book%20title',
json=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l'],
status=200)
results = self.test_connector.search('a book title')
self.assertEqual(len(results), 10)
self.assertEqual(results[0], 'a')
self.assertEqual(results[1], 'b')
self.assertEqual(results[2], 'c')
def test_search_result(self):
''' a class that stores info about a search result '''
result = SearchResult(
title='Title',
key='https://example.com/book/1',
author='Author Name',
year='1850',
connector=self.test_connector,
)
# there's really not much to test here, it's just a dataclass
self.assertEqual(result.confidence, 1)
self.assertEqual(result.title, 'Title')
def test_create_mapping(self):
''' maps remote fields for book data to bookwyrm activitypub fields '''
mapping = Mapping('isbn')
self.assertEqual(mapping.local_field, 'isbn')
self.assertEqual(mapping.remote_field, 'isbn')
self.assertEqual(mapping.formatter('bb'), 'bb')
def test_create_mapping_with_remote(self):
''' the remote field is different than the local field '''
mapping = Mapping('isbn', remote_field='isbn13')
self.assertEqual(mapping.local_field, 'isbn')
self.assertEqual(mapping.remote_field, 'isbn13')
self.assertEqual(mapping.formatter('bb'), 'bb')
def test_create_mapping_with_formatter(self):
''' a function is provided to modify the data '''
formatter = lambda x: 'aa' + x
mapping = Mapping('isbn', formatter=formatter)
self.assertEqual(mapping.local_field, 'isbn')
self.assertEqual(mapping.remote_field, 'isbn')
self.assertEqual(mapping.formatter, formatter)
self.assertEqual(mapping.formatter('bb'), 'aabb')

View File

@ -1,16 +1,17 @@
''' testing book data connectors '''
from dateutil import parser
from django.test import TestCase
import json
import pathlib
from django.test import TestCase
from bookwyrm import models
from bookwyrm.connectors.bookwyrm_connector import Connector
from bookwyrm.connectors.abstract_connector import SearchResult, get_date
from bookwyrm.connectors.abstract_connector import SearchResult
class BookWyrmConnector(TestCase):
''' this connector doesn't do much, just search '''
def setUp(self):
''' create the connector '''
models.Connector.objects.create(
identifier='example.com',
connector_file='bookwyrm_connector',
@ -29,13 +30,10 @@ class BookWyrmConnector(TestCase):
self.edition_data = json.loads(edition_file.read_bytes())
def test_is_work_data(self):
self.assertEqual(self.connector.is_work_data(self.work_data), True)
self.assertEqual(self.connector.is_work_data(self.edition_data), False)
def test_format_search_result(self):
datafile = pathlib.Path(__file__).parent.joinpath('../data/bw_search.json')
''' create a SearchResult object from search response json '''
datafile = pathlib.Path(__file__).parent.joinpath(
'../data/bw_search.json')
search_data = json.loads(datafile.read_bytes())
results = self.connector.parse_search_data(search_data)
self.assertIsInstance(results, list)
@ -46,9 +44,4 @@ class BookWyrmConnector(TestCase):
self.assertEqual(result.key, 'https://example.com/book/122')
self.assertEqual(result.author, 'Susanna Clarke')
self.assertEqual(result.year, 2017)
def test_get_date(self):
date = get_date(self.edition_data['published_date'])
expected = parser.parse("2020-09-15T00:00:00+00:00")
self.assertEqual(date, expected)
self.assertEqual(result.connector, self.connector)

View File

@ -0,0 +1,83 @@
''' interface between the app and various connectors '''
from django.test import TestCase
from bookwyrm import models
from bookwyrm.connectors import connector_manager
from bookwyrm.connectors.bookwyrm_connector \
import Connector as BookWyrmConnector
from bookwyrm.connectors.self_connector \
import Connector as SelfConnector
class ConnectorManager(TestCase):
''' interface between the app and various connectors '''
def setUp(self):
''' we'll need some books and a connector info entry '''
self.work = models.Work.objects.create(
title='Example Work'
)
self.edition = models.Edition.objects.create(
title='Example Edition',
parent_work=self.work
)
self.work.default_edition = self.edition
self.work.save()
self.connector = models.Connector.objects.create(
identifier='test_connector',
priority=1,
local=True,
connector_file='self_connector',
base_url='http://test.com/',
books_url='http://test.com/',
covers_url='http://test.com/',
)
def test_get_or_create_connector(self):
''' loads a connector if the data source is known or creates one '''
remote_id = 'https://example.com/object/1'
connector = connector_manager.get_or_create_connector(remote_id)
self.assertIsInstance(connector, BookWyrmConnector)
self.assertEqual(connector.identifier, 'example.com')
self.assertEqual(connector.base_url, 'https://example.com')
same_connector = connector_manager.get_or_create_connector(remote_id)
self.assertEqual(connector.identifier, same_connector.identifier)
def test_get_connectors(self):
''' load all connectors '''
remote_id = 'https://example.com/object/1'
connector_manager.get_or_create_connector(remote_id)
connectors = list(connector_manager.get_connectors())
self.assertEqual(len(connectors), 2)
self.assertIsInstance(connectors[0], SelfConnector)
self.assertIsInstance(connectors[1], BookWyrmConnector)
def test_search(self):
''' search all connectors '''
results = connector_manager.search('Example')
self.assertEqual(len(results), 1)
self.assertIsInstance(results[0]['connector'], SelfConnector)
self.assertEqual(len(results[0]['results']), 1)
self.assertEqual(results[0]['results'][0].title, 'Example Edition')
def test_local_search(self):
''' search only the local database '''
results = connector_manager.local_search('Example')
self.assertEqual(len(results), 1)
self.assertEqual(results[0].title, 'Example Edition')
def test_first_search_result(self):
''' only get one search result '''
result = connector_manager.first_search_result('Example')
self.assertEqual(result.title, 'Example Edition')
no_result = connector_manager.first_search_result('dkjfhg')
self.assertIsNone(no_result)
def test_load_connector(self):
''' load a connector object from the database entry '''
connector = connector_manager.load_connector(self.connector)
self.assertIsInstance(connector, SelfConnector)
self.assertEqual(connector.identifier, 'test_connector')

View File

@ -1,19 +1,24 @@
''' testing book data connectors '''
from dateutil import parser
from django.test import TestCase
import json
import pathlib
import pytz
from unittest.mock import patch
from django.test import TestCase
import responses
from bookwyrm import models
from bookwyrm.connectors.openlibrary import Connector
from bookwyrm.connectors.openlibrary import get_languages, get_description
from bookwyrm.connectors.openlibrary import pick_default_edition, get_openlibrary_key
from bookwyrm.connectors.abstract_connector import SearchResult, get_date
from bookwyrm.connectors.openlibrary import pick_default_edition, \
get_openlibrary_key
from bookwyrm.connectors.abstract_connector import SearchResult
from bookwyrm.connectors.connector_manager import ConnectorException
class Openlibrary(TestCase):
''' test loading data from openlibrary.org '''
def setUp(self):
''' creates the connector we'll use '''
models.Connector.objects.create(
identifier='openlibrary.org',
name='OpenLibrary',
@ -36,19 +41,85 @@ class Openlibrary(TestCase):
self.edition_list_data = json.loads(edition_list_file.read_bytes())
def test_get_remote_id_from_data(self):
''' format the remote id from the data '''
data = {'key': '/work/OL1234W'}
result = self.connector.get_remote_id_from_data(data)
self.assertEqual(result, 'https://openlibrary.org/work/OL1234W')
# error handlding
with self.assertRaises(ConnectorException):
self.connector.get_remote_id_from_data({})
def test_is_work_data(self):
''' detect if the loaded json is a work '''
self.assertEqual(self.connector.is_work_data(self.work_data), True)
self.assertEqual(self.connector.is_work_data(self.edition_data), False)
def test_pick_default_edition(self):
edition = pick_default_edition(self.edition_list_data['entries'])
self.assertEqual(edition['key'], '/books/OL9952943M')
@responses.activate
def test_get_edition_from_work_data(self):
''' loads a list of editions '''
data = {'key': '/work/OL1234W'}
responses.add(
responses.GET,
'https://openlibrary.org/work/OL1234W/editions',
json={'entries': []},
status=200)
with patch('bookwyrm.connectors.openlibrary.pick_default_edition') \
as pick_edition:
pick_edition.return_value = 'hi'
result = self.connector.get_edition_from_work_data(data)
self.assertEqual(result, 'hi')
@responses.activate
def test_get_work_from_edition_data(self):
''' loads a list of editions '''
data = {'works': [{'key': '/work/OL1234W'}]}
responses.add(
responses.GET,
'https://openlibrary.org/work/OL1234W',
json={'hi': 'there'},
status=200)
result = self.connector.get_work_from_edition_data(data)
self.assertEqual(result, {'hi': 'there'})
@responses.activate
def test_get_authors_from_data(self):
''' find authors in data '''
responses.add(
responses.GET,
'https://openlibrary.org/authors/OL382982A',
json={'hi': 'there'},
status=200)
results = self.connector.get_authors_from_data(self.work_data)
for result in results:
self.assertIsInstance(result, models.Author)
def test_get_cover_url(self):
''' formats a url that should contain the cover image '''
blob = ['image']
result = self.connector.get_cover_url(blob)
self.assertEqual(
result, 'https://covers.openlibrary.org/b/id/image-L.jpg')
def test_parse_search_result(self):
''' extract the results from the search json response '''
datafile = pathlib.Path(__file__).parent.joinpath(
'../data/ol_search.json')
search_data = json.loads(datafile.read_bytes())
result = self.connector.parse_search_data(search_data)
self.assertIsInstance(result, list)
self.assertEqual(len(result), 2)
def test_format_search_result(self):
''' translate json from openlibrary into SearchResult '''
datafile = pathlib.Path(__file__).parent.joinpath('../data/ol_search.json')
datafile = pathlib.Path(__file__).parent.joinpath(
'../data/ol_search.json')
search_data = json.loads(datafile.read_bytes())
results = self.connector.parse_search_data(search_data)
self.assertIsInstance(results, list)
@ -56,29 +127,66 @@ class Openlibrary(TestCase):
result = self.connector.format_search_result(results[0])
self.assertIsInstance(result, SearchResult)
self.assertEqual(result.title, 'This Is How You Lose the Time War')
self.assertEqual(result.key, 'https://openlibrary.org/works/OL20639540W')
self.assertEqual(
result.key, 'https://openlibrary.org/works/OL20639540W')
self.assertEqual(result.author, 'Amal El-Mohtar, Max Gladstone')
self.assertEqual(result.year, 2019)
self.assertEqual(result.connector, self.connector)
@responses.activate
def test_load_edition_data(self):
''' format url from key and make request '''
key = 'OL1234W'
responses.add(
responses.GET,
'https://openlibrary.org/works/OL1234W/editions',
json={'hi': 'there'}
)
result = self.connector.load_edition_data(key)
self.assertEqual(result, {'hi': 'there'})
@responses.activate
def test_expand_book_data(self):
''' given a book, get more editions '''
work = models.Work.objects.create(
title='Test Work', openlibrary_key='OL1234W')
edition = models.Edition.objects.create(
title='Test Edition', parent_work=work)
responses.add(
responses.GET,
'https://openlibrary.org/works/OL1234W/editions',
json={'entries': []},
)
with patch(
'bookwyrm.connectors.abstract_connector.AbstractConnector.' \
'create_edition_from_data'):
self.connector.expand_book_data(edition)
self.connector.expand_book_data(work)
def test_get_description(self):
''' should do some cleanup on the description data '''
description = get_description(self.work_data['description'])
expected = 'First in the Old Kingdom/Abhorsen series.'
self.assertEqual(description, expected)
def test_get_date(self):
date = get_date(self.work_data['first_publish_date'])
expected = pytz.utc.localize(parser.parse('1995'))
self.assertEqual(date, expected)
def test_get_openlibrary_key(self):
''' extracts the uuid '''
key = get_openlibrary_key('/books/OL27320736M')
self.assertEqual(key, 'OL27320736M')
def test_get_languages(self):
''' looks up languages from a list '''
languages = get_languages(self.edition_data['languages'])
self.assertEqual(languages, ['English'])
def test_get_ol_key(self):
key = get_openlibrary_key('/books/OL27320736M')
self.assertEqual(key, 'OL27320736M')
def test_pick_default_edition(self):
''' detect if the loaded json is an edition '''
edition = pick_default_edition(self.edition_list_data['entries'])
self.assertEqual(edition['key'], '/books/OL9788823M')

View File

@ -9,7 +9,9 @@ from bookwyrm.settings import DOMAIN
class SelfConnector(TestCase):
''' just uses local data '''
def setUp(self):
''' creating the connector '''
models.Connector.objects.create(
identifier=DOMAIN,
name='Local',
@ -22,56 +24,85 @@ class SelfConnector(TestCase):
priority=1,
)
self.connector = Connector(DOMAIN)
self.work = models.Work.objects.create(
title='Example Work',
)
self.edition = models.Edition.objects.create(
title='Edition of Example Work',
author_text='Anonymous',
published_date=datetime.datetime(1980, 5, 10, tzinfo=timezone.utc),
parent_work=self.work,
)
models.Edition.objects.create(
title='Another Edition',
parent_work=self.work,
series='Anonymous'
)
models.Edition.objects.create(
title='More Editions',
subtitle='The Anonymous Edition',
parent_work=self.work,
)
models.Edition.objects.create(
title='An Edition',
author_text='Fish',
parent_work=self.work
)
def test_format_search_result(self):
''' create a SearchResult '''
author = models.Author.objects.create(name='Anonymous')
edition = models.Edition.objects.create(
title='Edition of Example Work',
published_date=datetime.datetime(1980, 5, 10, tzinfo=timezone.utc),
)
edition.authors.add(author)
result = self.connector.search('Edition of Example')[0]
self.assertEqual(result.title, 'Edition of Example Work')
self.assertEqual(result.key, self.edition.remote_id)
self.assertEqual(result.key, edition.remote_id)
self.assertEqual(result.author, 'Anonymous')
self.assertEqual(result.year, 1980)
self.assertEqual(result.connector, self.connector)
def test_search_rank(self):
''' prioritize certain results '''
author = models.Author.objects.create(name='Anonymous')
edition = models.Edition.objects.create(
title='Edition of Example Work',
published_date=datetime.datetime(1980, 5, 10, tzinfo=timezone.utc),
parent_work=models.Work.objects.create(title='')
)
# author text is rank C
edition.authors.add(author)
# series is rank D
models.Edition.objects.create(
title='Another Edition',
series='Anonymous',
parent_work=models.Work.objects.create(title='')
)
# subtitle is rank B
models.Edition.objects.create(
title='More Editions',
subtitle='The Anonymous Edition',
parent_work=models.Work.objects.create(title='')
)
# title is rank A
models.Edition.objects.create(title='Anonymous')
# doesn't rank in this search
edition = models.Edition.objects.create(
title='An Edition',
parent_work=models.Work.objects.create(title='')
)
results = self.connector.search('Anonymous')
self.assertEqual(len(results), 2)
self.assertEqual(results[0].title, 'More Editions')
self.assertEqual(results[1].title, 'Edition of Example Work')
self.assertEqual(len(results), 3)
self.assertEqual(results[0].title, 'Anonymous')
self.assertEqual(results[1].title, 'More Editions')
self.assertEqual(results[2].title, 'Edition of Example Work')
def test_search_default_filter(self):
def test_search_multiple_editions(self):
''' it should get rid of duplicate editions for the same work '''
self.work.default_edition = self.edition
self.work.save()
work = models.Work.objects.create(title='Work Title')
edition_1 = models.Edition.objects.create(
title='Edition 1 Title', parent_work=work)
edition_2 = models.Edition.objects.create(
title='Edition 2 Title', parent_work=work)
edition_3 = models.Edition.objects.create(
title='Fish', parent_work=work)
work.default_edition = edition_2
work.save()
results = self.connector.search('Anonymous')
# pick the best edition
results = self.connector.search('Edition 1 Title')
self.assertEqual(len(results), 1)
self.assertEqual(results[0].title, 'Edition of Example Work')
self.assertEqual(results[0].key, edition_1.remote_id)
# pick the default edition when no match is best
results = self.connector.search('Edition Title')
self.assertEqual(len(results), 1)
self.assertEqual(results[0].key, edition_2.remote_id)
# only matches one edition, so no deduplication takes place
results = self.connector.search('Fish')
self.assertEqual(len(results), 1)
self.assertEqual(results[0].title, 'An Edition')
self.assertEqual(results[0].key, edition_3.remote_id)