rename main code directory

This commit is contained in:
Mouse Reeve
2020-09-17 13:30:54 -07:00
parent b42faad556
commit f77c156733
199 changed files with 0 additions and 0 deletions

View File

@ -0,0 +1,2 @@
''' bring connectors into the namespace '''
from .settings import CONNECTORS

View File

@ -0,0 +1,311 @@
''' functionality outline for a book data connector '''
from abc import ABC, abstractmethod
from dateutil import parser
import pytz
import requests
from django.db import transaction
from fedireads import models
class AbstractConnector(ABC):
''' generic book data connector '''
def __init__(self, identifier):
# load connector settings
info = models.Connector.objects.get(identifier=identifier)
self.connector = info
self.key_mappings = []
# fields we want to look for in book data to copy over
# title we handle separately.
self.book_mappings = []
# the things in the connector model to copy over
self_fields = [
'base_url',
'books_url',
'covers_url',
'search_url',
'max_query_count',
'name',
'identifier',
'local'
]
for field in self_fields:
setattr(self, field, getattr(info, field))
def is_available(self):
''' check if you're allowed to use this connector '''
if self.max_query_count is not None:
if self.connector.query_count >= self.max_query_count:
return False
return True
def search(self, query):
''' free text search '''
resp = requests.get(
'%s%s' % (self.search_url, query),
headers={
'Accept': 'application/json; charset=utf-8',
},
)
if not resp.ok:
resp.raise_for_status()
data = resp.json()
results = []
for doc in self.parse_search_data(data)[:10]:
results.append(self.format_search_result(doc))
return results
def get_or_create_book(self, remote_id):
''' pull up a book record by whatever means possible '''
# try to load the book
book = models.Book.objects.select_subclasses().filter(
remote_id=remote_id
).first()
if book:
if isinstance(book, models.Work):
return book.default_edition
return book
# no book was found, so we start creating a new one
data = get_data(remote_id)
work = None
edition = None
if self.is_work_data(data):
work_data = data
# if we requested a work and there's already an edition, we're set
work = self.match_from_mappings(work_data, models.Work)
if work and work.default_edition:
return work.default_edition
# no such luck, we need more information.
try:
edition_data = self.get_edition_from_work_data(work_data)
except KeyError:
# hack: re-use the work data as the edition data
# this is why remote ids aren't necessarily unique
edition_data = data
else:
edition_data = data
edition = self.match_from_mappings(edition_data, models.Edition)
# no need to figure out about the work if we already know about it
if edition and edition.parent_work:
return edition
# no such luck, we need more information.
try:
work_data = self.get_work_from_edition_date(edition_data)
except KeyError:
# remember this hack: re-use the work data as the edition data
work_data = data
# at this point, we need to figure out the work, edition, or both
# atomic so that we don't save a work with no edition for vice versa
with transaction.atomic():
if not work:
work_key = work_data.get('url')
work = self.create_book(work_key, work_data, models.Work)
if not edition:
ed_key = edition_data.get('url')
edition = self.create_book(ed_key, edition_data, models.Edition)
edition.default = True
edition.parent_work = work
edition.save()
# now's our change to fill in author gaps
if not edition.authors and work.authors:
edition.authors.set(work.authors.all())
edition.author_text = work.author_text
edition.save()
return edition
def create_book(self, remote_id, data, model):
''' create a work or edition from data '''
book = model.objects.create(
remote_id=remote_id,
title=data['title'],
connector=self.connector,
)
return self.update_book_from_data(book, data)
def update_book_from_data(self, book, data, update_cover=True):
''' for creating a new book or syncing with data '''
book = update_from_mappings(book, data, self.book_mappings)
for author in self.get_authors_from_data(data):
book.authors.add(author)
book.author_text = ', '.join(a.name for a in book.authors.all())
book.save()
if not update_cover:
return book
cover = self.get_cover_from_data(data)
if cover:
book.cover.save(*cover, save=True)
return book
def update_book(self, book, data=None):
''' load new data '''
if not book.sync and not book.sync_cover:
return
if not data:
key = getattr(book, self.key_name)
data = self.load_book_data(key)
if book.sync:
book = self.update_book_from_data(
book, data, update_cover=book.sync_cover)
else:
cover = self.get_cover_from_data(data)
if cover:
book.cover.save(*cover, save=True)
return book
def match_from_mappings(self, data, model):
''' try to find existing copies of this book using various keys '''
relevent_mappings = [m for m in self.key_mappings if \
not m.model or model == m.model]
for mapping in relevent_mappings:
# check if this field is present in the data
value = data.get(mapping.remote_field)
if not value:
continue
# extract the value in the right format
value = mapping.formatter(value)
# search our database for a matching book
kwargs = {mapping.local_field: value}
match = model.objects.filter(**kwargs).first()
if match:
return match
return None
@abstractmethod
def is_work_data(self, data):
''' differentiate works and editions '''
@abstractmethod
def get_edition_from_work_data(self, data):
''' every work needs at least one edition '''
@abstractmethod
def get_work_from_edition_date(self, data):
''' every edition needs a work '''
@abstractmethod
def get_authors_from_data(self, data):
''' load author data '''
@abstractmethod
def get_cover_from_data(self, data):
''' load cover '''
@abstractmethod
def parse_search_data(self, data):
''' turn the result json from a search into a list '''
@abstractmethod
def format_search_result(self, search_result):
''' create a SearchResult obj from json '''
@abstractmethod
def expand_book_data(self, book):
''' get more info on a book '''
def update_from_mappings(obj, data, mappings):
''' assign data to model with mappings '''
for mapping in mappings:
# check if this field is present in the data
value = data.get(mapping.remote_field)
if not value:
continue
# extract the value in the right format
value = mapping.formatter(value)
# assign the formatted value to the model
obj.__setattr__(mapping.local_field, value)
return obj
def get_date(date_string):
''' helper function to try to interpret dates '''
if not date_string:
return None
try:
return pytz.utc.localize(parser.parse(date_string))
except ValueError:
pass
try:
return parser.parse(date_string)
except ValueError:
return None
def get_data(url):
''' wrapper for request.get '''
resp = requests.get(
url,
headers={
'Accept': 'application/json; charset=utf-8',
},
)
if not resp.ok:
resp.raise_for_status()
data = resp.json()
return data
class SearchResult(object):
''' standardized search result object '''
def __init__(self, title, key, author, year):
self.title = title
self.key = key
self.author = author
self.year = year
def __repr__(self):
return "<SearchResult key={!r} title={!r} author={!r}>".format(
self.key, self.title, self.author)
class Mapping(object):
''' associate a local database field with a field in an external dataset '''
def __init__(
self, local_field, remote_field=None, formatter=None, model=None):
noop = lambda x: x
self.local_field = local_field
self.remote_field = remote_field or local_field
self.formatter = formatter or noop
self.model = model

View File

@ -0,0 +1,108 @@
''' using another fedireads instance as a source of book data '''
from uuid import uuid4
from django.core.exceptions import ObjectDoesNotExist
from django.core.files.base import ContentFile
import requests
from fedireads import models
from .abstract_connector import AbstractConnector, SearchResult, Mapping
from .abstract_connector import update_from_mappings, get_date, get_data
class Connector(AbstractConnector):
''' interact with other instances '''
def __init__(self, identifier):
super().__init__(identifier)
self.key_mappings = [
Mapping('isbn_13', model=models.Edition),
Mapping('isbn_10', model=models.Edition),
Mapping('lccn', model=models.Work),
Mapping('oclc_number', model=models.Edition),
Mapping('openlibrary_key'),
Mapping('goodreads_key'),
Mapping('asin'),
]
self.book_mappings = self.key_mappings + [
Mapping('sort_title'),
Mapping('subtitle'),
Mapping('description'),
Mapping('languages'),
Mapping('series'),
Mapping('series_number'),
Mapping('subjects'),
Mapping('subject_places'),
Mapping('first_published_date'),
Mapping('published_date'),
Mapping('pages'),
Mapping('physical_format'),
Mapping('publishers'),
]
self.author_mappings = [
Mapping('born', remote_field='birth_date', formatter=get_date),
Mapping('died', remote_field='death_date', formatter=get_date),
Mapping('bio'),
]
def is_work_data(self, data):
return data['book_type'] == 'Work'
def get_edition_from_work_data(self, data):
return data['editions'][0]
def get_work_from_edition_date(self, data):
return data['work']
def get_authors_from_data(self, data):
for author_url in data.get('authors', []):
yield self.get_or_create_author(author_url)
def get_cover_from_data(self, data):
cover_data = data.get('attachment')
if not cover_data:
return None
cover_url = cover_data[0].get('url')
response = requests.get(cover_url)
if not response.ok:
response.raise_for_status()
image_name = str(uuid4()) + cover_url.split('.')[-1]
image_content = ContentFile(response.content)
return [image_name, image_content]
def get_or_create_author(self, remote_id):
''' load that author '''
try:
return models.Author.objects.get(remote_id=remote_id)
except ObjectDoesNotExist:
pass
data = get_data(remote_id)
# ingest a new author
author = models.Author(remote_id=remote_id)
author = update_from_mappings(author, data, self.author_mappings)
author.save()
return author
def parse_search_data(self, data):
return data
def format_search_result(self, search_result):
return SearchResult(**search_result)
def expand_book_data(self, book):
# TODO
pass

View File

@ -0,0 +1,222 @@
''' openlibrary data connector '''
import re
import requests
from django.core.files.base import ContentFile
from fedireads import models
from .abstract_connector import AbstractConnector, SearchResult, Mapping
from .abstract_connector import update_from_mappings
from .abstract_connector import get_date, get_data
from .openlibrary_languages import languages
class Connector(AbstractConnector):
''' instantiate a connector for OL '''
def __init__(self, identifier):
super().__init__(identifier)
get_first = lambda a: a[0]
self.key_mappings = [
Mapping('isbn_13', model=models.Edition, formatter=get_first),
Mapping('isbn_10', model=models.Edition, formatter=get_first),
Mapping('lccn', model=models.Work, formatter=get_first),
Mapping(
'oclc_number',
remote_field='oclc_numbers',
model=models.Edition,
formatter=get_first
),
Mapping(
'openlibrary_key',
remote_field='key',
formatter=get_openlibrary_key
),
Mapping('goodreads_key'),
Mapping('asin'),
]
self.book_mappings = self.key_mappings + [
Mapping('sort_title'),
Mapping('subtitle'),
Mapping('description', formatter=get_description),
Mapping('languages', formatter=get_languages),
Mapping('series', formatter=get_first),
Mapping('series_number'),
Mapping('subjects'),
Mapping('subject_places'),
Mapping(
'first_published_date',
remote_field='first_publish_date',
formatter=get_date
),
Mapping(
'published_date',
remote_field='publish_date',
formatter=get_date
),
Mapping(
'pages',
model=models.Edition,
remote_field='number_of_pages'
),
Mapping('physical_format', model=models.Edition),
Mapping('publishers'),
]
self.author_mappings = [
Mapping('born', remote_field='birth_date', formatter=get_date),
Mapping('died', remote_field='death_date', formatter=get_date),
Mapping('bio', formatter=get_description),
]
def is_work_data(self, data):
return bool(re.match(r'^[\/\w]+OL\d+W$', data['key']))
def get_edition_from_work_data(self, data):
try:
key = data['key']
except KeyError:
return False
url = '%s/%s/editions' % (self.books_url, key)
data = get_data(url)
return pick_default_edition(data['entries'])
def get_work_from_edition_date(self, data):
try:
key = data['works'][0]['key']
except (IndexError, KeyError):
return False
url = '%s/%s' % (self.books_url, key)
return get_data(url)
def get_authors_from_data(self, data):
''' parse author json and load or create authors '''
for author_blob in data.get('authors', []):
author_blob = author_blob.get('author', author_blob)
# this id is "/authors/OL1234567A" and we want just "OL1234567A"
author_id = author_blob['key'].split('/')[-1]
yield self.get_or_create_author(author_id)
def get_cover_from_data(self, data):
''' ask openlibrary for the cover '''
if not data.get('covers'):
return None
cover_id = data.get('covers')[0]
image_name = '%s-M.jpg' % cover_id
url = '%s/b/id/%s' % (self.covers_url, image_name)
response = requests.get(url)
if not response.ok:
response.raise_for_status()
image_content = ContentFile(response.content)
return [image_name, image_content]
def parse_search_data(self, data):
return data.get('docs')
def format_search_result(self, doc):
# build the remote id from the openlibrary key
key = self.books_url + doc['key']
author = doc.get('author_name') or ['Unknown']
return SearchResult(
doc.get('title'),
key,
', '.join(author),
doc.get('first_publish_year'),
)
def load_edition_data(self, olkey):
''' query openlibrary for editions of a work '''
url = '%s/works/%s/editions.json' % (self.books_url, olkey)
return get_data(url)
def expand_book_data(self, book):
work = book
if isinstance(book, models.Edition):
work = book.parent_work
edition_options = self.load_edition_data(work.openlibrary_key)
for edition_data in edition_options.get('entries'):
olkey = edition_data.get('key').split('/')[-1]
if models.Edition.objects.filter(openlibrary_key=olkey).count():
continue
edition = self.create_book(olkey, edition_data, models.Edition)
edition.parent_work = work
edition.save()
if not edition.authors and work.authors:
edition.authors.set(work.authors.all())
def get_or_create_author(self, olkey):
''' load that author '''
if not re.match(r'^OL\d+A$', olkey):
raise ValueError('Invalid OpenLibrary author ID')
try:
return models.Author.objects.get(openlibrary_key=olkey)
except models.Author.DoesNotExist:
pass
url = '%s/authors/%s.json' % (self.base_url, olkey)
data = get_data(url)
author = models.Author(openlibrary_key=olkey)
author = update_from_mappings(author, data, self.author_mappings)
name = data.get('name')
# TODO this is making some BOLD assumption
if name:
author.last_name = name.split(' ')[-1]
author.first_name = ' '.join(name.split(' ')[:-1])
author.save()
return author
def get_description(description_blob):
''' descriptions can be a string or a dict '''
if isinstance(description_blob, dict):
return description_blob.get('value')
return description_blob
def get_openlibrary_key(key):
''' convert /books/OL27320736M into OL27320736M '''
return key.split('/')[-1]
def get_languages(language_blob):
''' /language/eng -> English '''
langs = []
for lang in language_blob:
langs.append(
languages.get(lang.get('key', ''), None)
)
return langs
def pick_default_edition(options):
''' favor physical copies with covers in english '''
if not options:
return None
if len(options) == 1:
return options[0]
options = [e for e in options if e.get('cover')] or options
options = [e for e in options if \
'/languages/eng' in str(e.get('languages'))] or options
formats = ['paperback', 'hardcover', 'mass market paperback']
options = [e for e in options if \
str(e.get('physical_format')).lower() in formats] or options
options = [e for e in options if e.get('isbn_13')] or options
options = [e for e in options if e.get('ocaid')] or options
return options[0]

View File

@ -0,0 +1,467 @@
''' key lookups for openlibrary languages '''
languages = {
'/languages/eng': 'English',
'/languages/fre': 'French',
'/languages/spa': 'Spanish',
'/languages/ger': 'German',
'/languages/rus': 'Russian',
'/languages/ita': 'Italian',
'/languages/chi': 'Chinese',
'/languages/jpn': 'Japanese',
'/languages/por': 'Portuguese',
'/languages/ara': 'Arabic',
'/languages/pol': 'Polish',
'/languages/heb': 'Hebrew',
'/languages/kor': 'Korean',
'/languages/dut': 'Dutch',
'/languages/ind': 'Indonesian',
'/languages/lat': 'Latin',
'/languages/und': 'Undetermined',
'/languages/cmn': 'Mandarin',
'/languages/hin': 'Hindi',
'/languages/swe': 'Swedish',
'/languages/dan': 'Danish',
'/languages/urd': 'Urdu',
'/languages/hun': 'Hungarian',
'/languages/cze': 'Czech',
'/languages/tur': 'Turkish',
'/languages/ukr': 'Ukrainian',
'/languages/gre': 'Greek',
'/languages/vie': 'Vietnamese',
'/languages/bul': 'Bulgarian',
'/languages/ben': 'Bengali',
'/languages/rum': 'Romanian',
'/languages/cat': 'Catalan',
'/languages/nor': 'Norwegian',
'/languages/tha': 'Thai',
'/languages/per': 'Persian',
'/languages/scr': 'Croatian',
'/languages/mul': 'Multiple languages',
'/languages/fin': 'Finnish',
'/languages/tam': 'Tamil',
'/languages/guj': 'Gujarati',
'/languages/mar': 'Marathi',
'/languages/scc': 'Serbian',
'/languages/pan': 'Panjabi',
'/languages/wel': 'Welsh',
'/languages/tel': 'Telugu',
'/languages/yid': 'Yiddish',
'/languages/kan': 'Kannada',
'/languages/slo': 'Slovak',
'/languages/san': 'Sanskrit',
'/languages/arm': 'Armenian',
'/languages/mal': 'Malayalam',
'/languages/may': 'Malay',
'/languages/bur': 'Burmese',
'/languages/slv': 'Slovenian',
'/languages/lit': 'Lithuanian',
'/languages/tib': 'Tibetan',
'/languages/lav': 'Latvian',
'/languages/est': 'Estonian',
'/languages/nep': 'Nepali',
'/languages/ori': 'Oriya',
'/languages/mon': 'Mongolian',
'/languages/alb': 'Albanian',
'/languages/iri': 'Irish',
'/languages/geo': 'Georgian',
'/languages/afr': 'Afrikaans',
'/languages/grc': 'Ancient Greek',
'/languages/mac': 'Macedonian',
'/languages/bel': 'Belarusian',
'/languages/ice': 'Icelandic',
'/languages/srp': 'Serbian',
'/languages/snh': 'Sinhalese',
'/languages/snd': 'Sindhi',
'/languages/ota': 'Turkish, Ottoman',
'/languages/kur': 'Kurdish',
'/languages/aze': 'Azerbaijani',
'/languages/pus': 'Pushto',
'/languages/amh': 'Amharic',
'/languages/gag': 'Galician',
'/languages/hrv': 'Croatian',
'/languages/sin': 'Sinhalese',
'/languages/asm': 'Assamese',
'/languages/uzb': 'Uzbek',
'/languages/gae': 'Scottish Gaelix',
'/languages/kaz': 'Kazakh',
'/languages/swa': 'Swahili',
'/languages/bos': 'Bosnian',
'/languages/glg': 'Galician ',
'/languages/baq': 'Basque',
'/languages/tgl': 'Tagalog',
'/languages/raj': 'Rajasthani',
'/languages/gle': 'Irish',
'/languages/lao': 'Lao',
'/languages/jav': 'Javanese',
'/languages/mai': 'Maithili',
'/languages/tgk': 'Tajik ',
'/languages/khm': 'Khmer',
'/languages/roh': 'Raeto-Romance',
'/languages/kok': 'Konkani ',
'/languages/sit': 'Sino-Tibetan (Other)',
'/languages/mol': 'Moldavian',
'/languages/kir': 'Kyrgyz',
'/languages/new': 'Newari',
'/languages/inc': 'Indic (Other)',
'/languages/frm': 'French, Middle (ca. 1300-1600)',
'/languages/esp': 'Esperanto',
'/languages/hau': 'Hausa',
'/languages/tag': 'Tagalog',
'/languages/tuk': 'Turkmen',
'/languages/enm': 'English, Middle (1100-1500)',
'/languages/map': 'Austronesian (Other)',
'/languages/pli': 'Pali',
'/languages/fro': 'French, Old (ca. 842-1300)',
'/languages/nic': 'Niger-Kordofanian (Other)',
'/languages/tir': 'Tigrinya',
'/languages/wen': 'Sorbian (Other)',
'/languages/bho': 'Bhojpuri',
'/languages/roa': 'Romance (Other)',
'/languages/tut': 'Altaic (Other)',
'/languages/bra': 'Braj',
'/languages/sun': 'Sundanese',
'/languages/fiu': 'Finno-Ugrian (Other)',
'/languages/far': 'Faroese',
'/languages/ban': 'Balinese',
'/languages/tar': 'Tatar',
'/languages/bak': 'Bashkir',
'/languages/tat': 'Tatar',
'/languages/chu': 'Church Slavic',
'/languages/dra': 'Dravidian (Other)',
'/languages/pra': 'Prakrit languages',
'/languages/paa': 'Papuan (Other)',
'/languages/doi': 'Dogri',
'/languages/lah': 'Lahndā',
'/languages/mni': 'Manipuri',
'/languages/yor': 'Yoruba',
'/languages/gmh': 'German, Middle High (ca. 1050-1500)',
'/languages/kas': 'Kashmiri',
'/languages/fri': 'Frisian',
'/languages/mla': 'Malagasy',
'/languages/egy': 'Egyptian',
'/languages/rom': 'Romani',
'/languages/syr': 'Syriac, Modern',
'/languages/cau': 'Caucasian (Other)',
'/languages/hbs': 'Serbo-Croatian',
'/languages/sai': 'South American Indian (Other)',
'/languages/pro': 'Provençal (to 1500)',
'/languages/cpf': 'Creoles and Pidgins, French-based (Other)',
'/languages/ang': 'English, Old (ca. 450-1100)',
'/languages/bal': 'Baluchi',
'/languages/gla': 'Scottish Gaelic',
'/languages/chv': 'Chuvash',
'/languages/kin': 'Kinyarwanda',
'/languages/zul': 'Zulu',
'/languages/sla': 'Slavic (Other)',
'/languages/som': 'Somali',
'/languages/mlt': 'Maltese',
'/languages/uig': 'Uighur',
'/languages/mlg': 'Malagasy',
'/languages/sho': 'Shona',
'/languages/lan': 'Occitan (post 1500)',
'/languages/bre': 'Breton',
'/languages/sco': 'Scots',
'/languages/sso': 'Sotho',
'/languages/myn': 'Mayan languages',
'/languages/xho': 'Xhosa',
'/languages/gem': 'Germanic (Other)',
'/languages/esk': 'Eskimo languages',
'/languages/akk': 'Akkadian',
'/languages/div': 'Maldivian',
'/languages/sah': 'Yakut',
'/languages/tsw': 'Tswana',
'/languages/nso': 'Northern Sotho',
'/languages/pap': 'Papiamento',
'/languages/bnt': 'Bantu (Other)',
'/languages/oss': 'Ossetic',
'/languages/cre': 'Cree',
'/languages/ibo': 'Igbo',
'/languages/fao': 'Faroese',
'/languages/nai': 'North American Indian (Other)',
'/languages/mag': 'Magahi',
'/languages/arc': 'Aramaic',
'/languages/epo': 'Esperanto',
'/languages/kha': 'Khasi',
'/languages/oji': 'Ojibwa',
'/languages/que': 'Quechua',
'/languages/lug': 'Ganda',
'/languages/mwr': 'Marwari',
'/languages/awa': 'Awadhi ',
'/languages/cor': 'Cornish',
'/languages/lad': 'Ladino',
'/languages/dzo': 'Dzongkha',
'/languages/cop': 'Coptic',
'/languages/nah': 'Nahuatl',
'/languages/cai': 'Central American Indian (Other)',
'/languages/phi': 'Philippine (Other)',
'/languages/moh': 'Mohawk',
'/languages/crp': 'Creoles and Pidgins (Other)',
'/languages/nya': 'Nyanja',
'/languages/wol': 'Wolof ',
'/languages/haw': 'Hawaiian',
'/languages/eth': 'Ethiopic',
'/languages/mis': 'Miscellaneous languages',
'/languages/mkh': 'Mon-Khmer (Other)',
'/languages/alg': 'Algonquian (Other)',
'/languages/nde': 'Ndebele (Zimbabwe)',
'/languages/ssa': 'Nilo-Saharan (Other)',
'/languages/chm': 'Mari',
'/languages/che': 'Chechen',
'/languages/gez': 'Ethiopic',
'/languages/ven': 'Venda',
'/languages/cam': 'Khmer',
'/languages/fur': 'Friulian',
'/languages/ful': 'Fula',
'/languages/gal': 'Oromo',
'/languages/jrb': 'Judeo-Arabic',
'/languages/bua': 'Buriat',
'/languages/ady': 'Adygei',
'/languages/bem': 'Bemba',
'/languages/kar': 'Karen languages',
'/languages/sna': 'Shona',
'/languages/twi': 'Twi',
'/languages/btk': 'Batak',
'/languages/kaa': 'Kara-Kalpak',
'/languages/kom': 'Komi',
'/languages/sot': 'Sotho',
'/languages/tso': 'Tsonga',
'/languages/cpe': 'Creoles and Pidgins, English-based (Other)',
'/languages/gua': 'Guarani',
'/languages/mao': 'Maori',
'/languages/mic': 'Micmac',
'/languages/swz': 'Swazi',
'/languages/taj': 'Tajik',
'/languages/smo': 'Samoan',
'/languages/ace': 'Achinese',
'/languages/afa': 'Afroasiatic (Other)',
'/languages/lap': 'Sami',
'/languages/min': 'Minangkabau',
'/languages/oci': 'Occitan (post 1500)',
'/languages/tsn': 'Tswana',
'/languages/pal': 'Pahlavi',
'/languages/sux': 'Sumerian',
'/languages/ewe': 'Ewe',
'/languages/him': 'Himachali',
'/languages/kaw': 'Kawi',
'/languages/lus': 'Lushai',
'/languages/ceb': 'Cebuano',
'/languages/chr': 'Cherokee',
'/languages/fil': 'Filipino',
'/languages/ndo': 'Ndonga',
'/languages/ilo': 'Iloko',
'/languages/kbd': 'Kabardian',
'/languages/orm': 'Oromo',
'/languages/dum': 'Dutch, Middle (ca. 1050-1350)',
'/languages/bam': 'Bambara',
'/languages/goh': 'Old High German',
'/languages/got': 'Gothic',
'/languages/kon': 'Kongo',
'/languages/mun': 'Munda (Other)',
'/languages/kru': 'Kurukh',
'/languages/pam': 'Pampanga',
'/languages/grn': 'Guarani',
'/languages/gaa': '',
'/languages/fry': 'Frisian',
'/languages/iba': 'Iban',
'/languages/mak': 'Makasar',
'/languages/kik': 'Kikuyu',
'/languages/cho': 'Choctaw',
'/languages/cpp': 'Creoles and Pidgins, Portuguese-based (Other)',
'/languages/dak': 'Dakota',
'/languages/udm': 'Udmurt ',
'/languages/hat': 'Haitian French Creole',
'/languages/mus': 'Creek',
'/languages/ber': 'Berber (Other)',
'/languages/hil': 'Hiligaynon',
'/languages/iro': 'Iroquoian (Other)',
'/languages/kua': 'Kuanyama',
'/languages/mno': 'Manobo languages',
'/languages/run': 'Rundi',
'/languages/sat': 'Santali',
'/languages/shn': 'Shan',
'/languages/tyv': 'Tuvinian',
'/languages/chg': 'Chagatai',
'/languages/syc': 'Syriac',
'/languages/ath': 'Athapascan (Other)',
'/languages/aym': 'Aymara',
'/languages/bug': 'Bugis',
'/languages/cel': 'Celtic (Other)',
'/languages/int': 'Interlingua (International Auxiliary Language Association)',
'/languages/xal': 'Oirat',
'/languages/ava': 'Avaric',
'/languages/son': 'Songhai',
'/languages/tah': 'Tahitian',
'/languages/tet': 'Tetum',
'/languages/ira': 'Iranian (Other)',
'/languages/kac': 'Kachin',
'/languages/nob': 'Norwegian (Bokmål)',
'/languages/vai': 'Vai',
'/languages/bik': 'Bikol',
'/languages/mos': 'Mooré',
'/languages/tig': 'Tigré',
'/languages/fat': 'Fanti',
'/languages/her': 'Herero',
'/languages/kal': 'Kalâtdlisut',
'/languages/mad': 'Madurese',
'/languages/yue': 'Cantonese',
'/languages/chn': 'Chinook jargon',
'/languages/hmn': 'Hmong',
'/languages/lin': 'Lingala',
'/languages/man': 'Mandingo',
'/languages/nds': 'Low German',
'/languages/bas': 'Basa',
'/languages/gay': 'Gayo',
'/languages/gsw': 'gsw',
'/languages/ine': 'Indo-European (Other)',
'/languages/kro': 'Kru (Other)',
'/languages/kum': 'Kumyk',
'/languages/tsi': 'Tsimshian',
'/languages/zap': 'Zapotec',
'/languages/ach': 'Acoli',
'/languages/ada': 'Adangme',
'/languages/aka': 'Akan',
'/languages/khi': 'Khoisan (Other)',
'/languages/srd': 'Sardinian',
'/languages/arn': 'Mapuche',
'/languages/dyu': 'Dyula',
'/languages/loz': 'Lozi',
'/languages/ltz': 'Luxembourgish',
'/languages/sag': 'Sango (Ubangi Creole)',
'/languages/lez': 'Lezgian',
'/languages/luo': 'Luo (Kenya and Tanzania)',
'/languages/ssw': 'Swazi ',
'/languages/krc': 'Karachay-Balkar',
'/languages/nyn': 'Nyankole',
'/languages/sal': 'Salishan languages',
'/languages/jpr': 'Judeo-Persian',
'/languages/pau': 'Palauan',
'/languages/smi': 'Sami',
'/languages/aar': 'Afar',
'/languages/abk': 'Abkhaz',
'/languages/gon': 'Gondi',
'/languages/nzi': 'Nzima',
'/languages/sam': 'Samaritan Aramaic',
'/languages/sao': 'Samoan',
'/languages/srr': 'Serer',
'/languages/apa': 'Apache languages',
'/languages/crh': 'Crimean Tatar',
'/languages/efi': 'Efik',
'/languages/iku': 'Inuktitut',
'/languages/nav': 'Navajo',
'/languages/pon': 'Ponape',
'/languages/tmh': 'Tamashek',
'/languages/aus': 'Australian languages',
'/languages/oto': 'Otomian languages',
'/languages/war': 'Waray',
'/languages/ypk': 'Yupik languages',
'/languages/ave': 'Avestan',
'/languages/cus': 'Cushitic (Other)',
'/languages/del': 'Delaware',
'/languages/fon': 'Fon',
'/languages/ina': 'Interlingua (International Auxiliary Language Association)',
'/languages/myv': 'Erzya',
'/languages/pag': 'Pangasinan',
'/languages/peo': 'Old Persian (ca. 600-400 B.C.)',
'/languages/vls': 'Flemish',
'/languages/bai': 'Bamileke languages',
'/languages/bla': 'Siksika',
'/languages/day': 'Dayak',
'/languages/men': 'Mende',
'/languages/tai': 'Tai',
'/languages/ton': 'Tongan',
'/languages/uga': 'Ugaritic',
'/languages/yao': 'Yao (Africa)',
'/languages/zza': 'Zaza',
'/languages/bin': 'Edo',
'/languages/frs': 'East Frisian',
'/languages/inh': 'Ingush',
'/languages/mah': 'Marshallese',
'/languages/sem': 'Semitic (Other)',
'/languages/art': 'Artificial (Other)',
'/languages/chy': 'Cheyenne',
'/languages/cmc': 'Chamic languages',
'/languages/dar': 'Dargwa',
'/languages/dua': 'Duala',
'/languages/elx': 'Elamite',
'/languages/fan': 'Fang',
'/languages/fij': 'Fijian',
'/languages/gil': 'Gilbertese',
'/languages/ijo': 'Ijo',
'/languages/kam': 'Kamba',
'/languages/nog': 'Nogai',
'/languages/non': 'Old Norse',
'/languages/tem': 'Temne',
'/languages/arg': 'Aragonese',
'/languages/arp': 'Arapaho',
'/languages/arw': 'Arawak',
'/languages/din': 'Dinka',
'/languages/grb': 'Grebo',
'/languages/kos': 'Kusaie',
'/languages/lub': 'Luba-Katanga',
'/languages/mnc': 'Manchu',
'/languages/nyo': 'Nyoro',
'/languages/rar': 'Rarotongan',
'/languages/sel': 'Selkup',
'/languages/tkl': 'Tokelauan',
'/languages/tog': 'Tonga (Nyasa)',
'/languages/tum': 'Tumbuka',
'/languages/alt': 'Altai',
'/languages/ase': 'American Sign Language',
'/languages/ast': 'Asturian',
'/languages/chk': 'Chuukese',
'/languages/cos': 'Corsican',
'/languages/ewo': 'Ewondo',
'/languages/gor': 'Gorontalo',
'/languages/hmo': 'Hiri Motu',
'/languages/lol': 'Mongo-Nkundu',
'/languages/lun': 'Lunda',
'/languages/mas': 'Masai',
'/languages/niu': 'Niuean',
'/languages/rup': 'Aromanian',
'/languages/sas': 'Sasak',
'/languages/sio': 'Siouan (Other)',
'/languages/sus': 'Susu',
'/languages/zun': 'Zuni',
'/languages/bat': 'Baltic (Other)',
'/languages/car': 'Carib',
'/languages/cha': 'Chamorro',
'/languages/kab': 'Kabyle',
'/languages/kau': 'Kanuri',
'/languages/kho': 'Khotanese',
'/languages/lua': 'Luba-Lulua',
'/languages/mdf': 'Moksha',
'/languages/nbl': 'Ndebele (South Africa)',
'/languages/umb': 'Umbundu',
'/languages/wak': 'Wakashan languages',
'/languages/wal': 'Wolayta',
'/languages/ale': 'Aleut',
'/languages/bis': 'Bislama',
'/languages/gba': 'Gbaya',
'/languages/glv': 'Manx',
'/languages/gul': 'Gullah',
'/languages/ipk': 'Inupiaq',
'/languages/krl': 'Karelian',
'/languages/lam': 'Lamba (Zambia and Congo)',
'/languages/sad': 'Sandawe',
'/languages/sid': 'Sidamo',
'/languages/snk': 'Soninke',
'/languages/srn': 'Sranan',
'/languages/suk': 'Sukuma',
'/languages/ter': 'Terena',
'/languages/tiv': 'Tiv',
'/languages/tli': 'Tlingit',
'/languages/tpi': 'Tok Pisin',
'/languages/tvl': 'Tuvaluan',
'/languages/yap': 'Yapese',
'/languages/eka': 'Ekajuk',
'/languages/hsb': 'Upper Sorbian',
'/languages/ido': 'Ido',
'/languages/kmb': 'Kimbundu',
'/languages/kpe': 'Kpelle',
'/languages/mwl': 'Mirandese',
'/languages/nno': 'Nynorsk',
'/languages/nub': 'Nubian languages',
'/languages/osa': 'Osage',
'/languages/sme': 'Northern Sami',
'/languages/znd': 'Zande languages',
}

View File

@ -0,0 +1,81 @@
''' using a fedireads instance as a source of book data '''
from django.contrib.postgres.search import SearchRank, SearchVector
from fedireads import models
from .abstract_connector import AbstractConnector, SearchResult
class Connector(AbstractConnector):
''' instantiate a connector '''
def __init__(self, identifier):
super().__init__(identifier)
def search(self, query):
''' right now you can't search fedireads sorry, but when
that gets implemented it will totally rule '''
vector = SearchVector('title', weight='A') +\
SearchVector('subtitle', weight='B') +\
SearchVector('author_text', weight='A') +\
SearchVector('isbn_13', weight='A') +\
SearchVector('isbn_10', weight='A') +\
SearchVector('openlibrary_key', weight='B') +\
SearchVector('goodreads_key', weight='B') +\
SearchVector('asin', weight='B') +\
SearchVector('oclc_number', weight='B') +\
SearchVector('remote_id', weight='B') +\
SearchVector('description', weight='C') +\
SearchVector('series', weight='C')
results = models.Edition.objects.annotate(
search=vector
).annotate(
rank=SearchRank(vector, query)
).filter(
rank__gt=0
).order_by('-rank')
results = results.filter(default=True) or results
search_results = []
for book in results[:10]:
search_results.append(
self.format_search_result(book)
)
return search_results
def format_search_result(self, book):
return SearchResult(
book.title,
book.local_id,
book.author_text,
book.published_date.year if book.published_date else None,
)
def get_or_create_book(self, remote_id):
''' this COULD be semi-implemented but I think it shouldn't be used '''
pass
def is_work_data(self, data):
pass
def get_edition_from_work_data(self, data):
pass
def get_work_from_edition_date(self, data):
pass
def get_authors_from_data(self, data):
return None
def get_cover_from_data(self, data):
return None
def parse_search_data(self, data):
''' it's already in the right format, don't even worry about it '''
return data
def expand_book_data(self, book):
pass

View File

@ -0,0 +1,3 @@
''' settings book data connectors '''
CONNECTORS = ['openlibrary', 'self_connector', 'fedireads_connector']