Runs black

This commit is contained in:
Mouse Reeve
2021-03-08 08:49:10 -08:00
parent a07f955781
commit 70296e760b
198 changed files with 10239 additions and 8572 deletions

View File

@ -1,4 +1,4 @@
''' bring connectors into the namespace '''
""" bring connectors into the namespace """
from .settings import CONNECTORS
from .abstract_connector import ConnectorException
from .abstract_connector import get_data, get_image

View File

@ -1,4 +1,4 @@
''' functionality outline for a book data connector '''
""" functionality outline for a book data connector """
from abc import ABC, abstractmethod
from dataclasses import asdict, dataclass
import logging
@ -13,8 +13,11 @@ from .connector_manager import load_more_data, ConnectorException
logger = logging.getLogger(__name__)
class AbstractMinimalConnector(ABC):
''' just the bare bones, for other bookwyrm instances '''
""" just the bare bones, for other bookwyrm instances """
def __init__(self, identifier):
# load connector settings
info = models.Connector.objects.get(identifier=identifier)
@ -22,31 +25,31 @@ class AbstractMinimalConnector(ABC):
# the things in the connector model to copy over
self_fields = [
'base_url',
'books_url',
'covers_url',
'search_url',
'isbn_search_url',
'max_query_count',
'name',
'identifier',
'local'
"base_url",
"books_url",
"covers_url",
"search_url",
"isbn_search_url",
"max_query_count",
"name",
"identifier",
"local",
]
for field in self_fields:
setattr(self, field, getattr(info, field))
def search(self, query, min_confidence=None):
''' free text search '''
""" free text search """
params = {}
if min_confidence:
params['min_confidence'] = min_confidence
params["min_confidence"] = min_confidence
resp = requests.get(
'%s%s' % (self.search_url, query),
"%s%s" % (self.search_url, query),
params=params,
headers={
'Accept': 'application/json; charset=utf-8',
'User-Agent': settings.USER_AGENT,
"Accept": "application/json; charset=utf-8",
"User-Agent": settings.USER_AGENT,
},
)
if not resp.ok:
@ -55,7 +58,7 @@ class AbstractMinimalConnector(ABC):
data = resp.json()
except ValueError as e:
logger.exception(e)
raise ConnectorException('Unable to parse json response', e)
raise ConnectorException("Unable to parse json response", e)
results = []
for doc in self.parse_search_data(data)[:10]:
@ -63,14 +66,14 @@ class AbstractMinimalConnector(ABC):
return results
def isbn_search(self, query):
''' isbn search '''
""" isbn search """
params = {}
resp = requests.get(
'%s%s' % (self.isbn_search_url, query),
"%s%s" % (self.isbn_search_url, query),
params=params,
headers={
'Accept': 'application/json; charset=utf-8',
'User-Agent': settings.USER_AGENT,
"Accept": "application/json; charset=utf-8",
"User-Agent": settings.USER_AGENT,
},
)
if not resp.ok:
@ -79,7 +82,7 @@ class AbstractMinimalConnector(ABC):
data = resp.json()
except ValueError as e:
logger.exception(e)
raise ConnectorException('Unable to parse json response', e)
raise ConnectorException("Unable to parse json response", e)
results = []
for doc in self.parse_isbn_search_data(data):
@ -88,49 +91,49 @@ class AbstractMinimalConnector(ABC):
@abstractmethod
def get_or_create_book(self, remote_id):
''' pull up a book record by whatever means possible '''
""" pull up a book record by whatever means possible """
@abstractmethod
def parse_search_data(self, data):
''' turn the result json from a search into a list '''
""" turn the result json from a search into a list """
@abstractmethod
def format_search_result(self, search_result):
''' create a SearchResult obj from json '''
""" create a SearchResult obj from json """
@abstractmethod
def parse_isbn_search_data(self, data):
''' turn the result json from a search into a list '''
""" turn the result json from a search into a list """
@abstractmethod
def format_isbn_search_result(self, search_result):
''' create a SearchResult obj from json '''
""" create a SearchResult obj from json """
class AbstractConnector(AbstractMinimalConnector):
''' generic book data connector '''
""" generic book data connector """
def __init__(self, identifier):
super().__init__(identifier)
# fields we want to look for in book data to copy over
# title we handle separately.
self.book_mappings = []
def is_available(self):
''' check if you're allowed to use this connector '''
""" check if you're allowed to use this connector """
if self.max_query_count is not None:
if self.connector.query_count >= self.max_query_count:
return False
return True
def get_or_create_book(self, remote_id):
''' translate arbitrary json into an Activitypub dataclass '''
""" translate arbitrary json into an Activitypub dataclass """
# first, check if we have the origin_id saved
existing = models.Edition.find_existing_by_remote_id(remote_id) or \
models.Work.find_existing_by_remote_id(remote_id)
existing = models.Edition.find_existing_by_remote_id(
remote_id
) or models.Work.find_existing_by_remote_id(remote_id)
if existing:
if hasattr(existing, 'get_default_editon'):
if hasattr(existing, "get_default_editon"):
return existing.get_default_editon()
return existing
@ -154,7 +157,7 @@ class AbstractConnector(AbstractMinimalConnector):
edition_data = data
if not work_data or not edition_data:
raise ConnectorException('Unable to load book data: %s' % remote_id)
raise ConnectorException("Unable to load book data: %s" % remote_id)
with transaction.atomic():
# create activitypub object
@ -168,11 +171,10 @@ class AbstractConnector(AbstractMinimalConnector):
load_more_data.delay(self.connector.id, work.id)
return edition
def create_edition_from_data(self, work, edition_data):
''' if we already have the work, we're ready '''
""" if we already have the work, we're ready """
mapped_data = dict_from_mappings(edition_data, self.book_mappings)
mapped_data['work'] = work.remote_id
mapped_data["work"] = work.remote_id
edition_activity = activitypub.Edition(**mapped_data)
edition = edition_activity.to_model(model=models.Edition)
edition.connector = self.connector
@ -189,9 +191,8 @@ class AbstractConnector(AbstractMinimalConnector):
return edition
def get_or_create_author(self, remote_id):
''' load that author '''
""" load that author """
existing = models.Author.find_existing_by_remote_id(remote_id)
if existing:
return existing
@ -203,31 +204,30 @@ class AbstractConnector(AbstractMinimalConnector):
# this will dedupe
return activity.to_model(model=models.Author)
@abstractmethod
def is_work_data(self, data):
''' differentiate works and editions '''
""" differentiate works and editions """
@abstractmethod
def get_edition_from_work_data(self, data):
''' every work needs at least one edition '''
""" every work needs at least one edition """
@abstractmethod
def get_work_from_edition_data(self, data):
''' every edition needs a work '''
""" every edition needs a work """
@abstractmethod
def get_authors_from_data(self, data):
''' load author data '''
""" load author data """
@abstractmethod
def expand_book_data(self, book):
''' get more info on a book '''
""" get more info on a book """
def dict_from_mappings(data, mappings):
''' create a dict in Activitypub format, using mappings supplies by
the subclass '''
"""create a dict in Activitypub format, using mappings supplies by
the subclass"""
result = {}
for mapping in mappings:
result[mapping.local_field] = mapping.get_value(data)
@ -235,13 +235,13 @@ def dict_from_mappings(data, mappings):
def get_data(url):
''' wrapper for request.get '''
""" wrapper for request.get """
try:
resp = requests.get(
url,
headers={
'Accept': 'application/json; charset=utf-8',
'User-Agent': settings.USER_AGENT,
"Accept": "application/json; charset=utf-8",
"User-Agent": settings.USER_AGENT,
},
)
except (RequestError, SSLError) as e:
@ -260,12 +260,12 @@ def get_data(url):
def get_image(url):
''' wrapper for requesting an image '''
""" wrapper for requesting an image """
try:
resp = requests.get(
url,
headers={
'User-Agent': settings.USER_AGENT,
"User-Agent": settings.USER_AGENT,
},
)
except (RequestError, SSLError) as e:
@ -278,7 +278,8 @@ def get_image(url):
@dataclass
class SearchResult:
''' standardized search result object '''
""" standardized search result object """
title: str
key: str
author: str
@ -288,17 +289,19 @@ class SearchResult:
def __repr__(self):
return "<SearchResult key={!r} title={!r} author={!r}>".format(
self.key, self.title, self.author)
self.key, self.title, self.author
)
def json(self):
''' serialize a connector for json response '''
""" serialize a connector for json response """
serialized = asdict(self)
del serialized['connector']
del serialized["connector"]
return serialized
class Mapping:
''' associate a local database field with a field in an external dataset '''
""" associate a local database field with a field in an external dataset """
def __init__(self, local_field, remote_field=None, formatter=None):
noop = lambda x: x
@ -307,11 +310,11 @@ class Mapping:
self.formatter = formatter or noop
def get_value(self, data):
''' pull a field from incoming json and return the formatted version '''
""" pull a field from incoming json and return the formatted version """
value = data.get(self.remote_field)
if not value:
return None
try:
return self.formatter(value)
except:# pylint: disable=bare-except
except: # pylint: disable=bare-except
return None

View File

@ -1,10 +1,10 @@
''' using another bookwyrm instance as a source of book data '''
""" using another bookwyrm instance as a source of book data """
from bookwyrm import activitypub, models
from .abstract_connector import AbstractMinimalConnector, SearchResult
class Connector(AbstractMinimalConnector):
''' this is basically just for search '''
""" this is basically just for search """
def get_or_create_book(self, remote_id):
edition = activitypub.resolve_remote_id(remote_id, model=models.Edition)
@ -17,13 +17,12 @@ class Connector(AbstractMinimalConnector):
return data
def format_search_result(self, search_result):
search_result['connector'] = self
search_result["connector"] = self
return SearchResult(**search_result)
def parse_isbn_search_data(self, data):
return data
def format_isbn_search_result(self, search_result):
search_result['connector'] = self
search_result["connector"] = self
return SearchResult(**search_result)

View File

@ -1,4 +1,4 @@
''' interface with whatever connectors the app has '''
""" interface with whatever connectors the app has """
import importlib
import re
from urllib.parse import urlparse
@ -10,24 +10,24 @@ from bookwyrm.tasks import app
class ConnectorException(HTTPError):
''' when the connector can't do what was asked '''
""" when the connector can't do what was asked """
def search(query, min_confidence=0.1):
''' find books based on arbitary keywords '''
""" find books based on arbitary keywords """
results = []
# Have we got a ISBN ?
isbn = re.sub('[\W_]', '', query)
maybe_isbn = len(isbn) in [10, 13] # ISBN10 or ISBN13
isbn = re.sub("[\W_]", "", query)
maybe_isbn = len(isbn) in [10, 13] # ISBN10 or ISBN13
dedup_slug = lambda r: '%s/%s/%s' % (r.title, r.author, r.year)
dedup_slug = lambda r: "%s/%s/%s" % (r.title, r.author, r.year)
result_index = set()
for connector in get_connectors():
result_set = None
if maybe_isbn:
# Search on ISBN
if not connector.isbn_search_url or connector.isbn_search_url == '':
if not connector.isbn_search_url or connector.isbn_search_url == "":
result_set = []
else:
try:
@ -42,32 +42,33 @@ def search(query, min_confidence=0.1):
except (HTTPError, ConnectorException):
continue
result_set = [r for r in result_set \
if dedup_slug(r) not in result_index]
result_set = [r for r in result_set if dedup_slug(r) not in result_index]
# `|=` concats two sets. WE ARE GETTING FANCY HERE
result_index |= set(dedup_slug(r) for r in result_set)
results.append({
'connector': connector,
'results': result_set,
})
results.append(
{
"connector": connector,
"results": result_set,
}
)
return results
def local_search(query, min_confidence=0.1, raw=False):
''' only look at local search results '''
""" only look at local search results """
connector = load_connector(models.Connector.objects.get(local=True))
return connector.search(query, min_confidence=min_confidence, raw=raw)
def isbn_local_search(query, raw=False):
''' only look at local search results '''
""" only look at local search results """
connector = load_connector(models.Connector.objects.get(local=True))
return connector.isbn_search(query, raw=raw)
def first_search_result(query, min_confidence=0.1):
''' search until you find a result that fits '''
""" search until you find a result that fits """
for connector in get_connectors():
result = connector.search(query, min_confidence=min_confidence)
if result:
@ -76,29 +77,29 @@ def first_search_result(query, min_confidence=0.1):
def get_connectors():
''' load all connectors '''
for info in models.Connector.objects.order_by('priority').all():
""" load all connectors """
for info in models.Connector.objects.order_by("priority").all():
yield load_connector(info)
def get_or_create_connector(remote_id):
''' get the connector related to the author's server '''
""" get the connector related to the author's server """
url = urlparse(remote_id)
identifier = url.netloc
if not identifier:
raise ValueError('Invalid remote id')
raise ValueError("Invalid remote id")
try:
connector_info = models.Connector.objects.get(identifier=identifier)
except models.Connector.DoesNotExist:
connector_info = models.Connector.objects.create(
identifier=identifier,
connector_file='bookwyrm_connector',
base_url='https://%s' % identifier,
books_url='https://%s/book' % identifier,
covers_url='https://%s/images/covers' % identifier,
search_url='https://%s/search?q=' % identifier,
priority=2
connector_file="bookwyrm_connector",
base_url="https://%s" % identifier,
books_url="https://%s/book" % identifier,
covers_url="https://%s/images/covers" % identifier,
search_url="https://%s/search?q=" % identifier,
priority=2,
)
return load_connector(connector_info)
@ -106,7 +107,7 @@ def get_or_create_connector(remote_id):
@app.task
def load_more_data(connector_id, book_id):
''' background the work of getting all 10,000 editions of LoTR '''
""" background the work of getting all 10,000 editions of LoTR """
connector_info = models.Connector.objects.get(id=connector_id)
connector = load_connector(connector_info)
book = models.Book.objects.select_subclasses().get(id=book_id)
@ -114,8 +115,8 @@ def load_more_data(connector_id, book_id):
def load_connector(connector_info):
''' instantiate the connector class '''
""" instantiate the connector class """
connector = importlib.import_module(
'bookwyrm.connectors.%s' % connector_info.connector_file
"bookwyrm.connectors.%s" % connector_info.connector_file
)
return connector.Connector(connector_info.identifier)

View File

@ -1,4 +1,4 @@
''' openlibrary data connector '''
""" openlibrary data connector """
import re
from bookwyrm import models
@ -9,148 +9,134 @@ from .openlibrary_languages import languages
class Connector(AbstractConnector):
''' instantiate a connector for OL '''
""" instantiate a connector for OL """
def __init__(self, identifier):
super().__init__(identifier)
get_first = lambda a: a[0]
get_remote_id = lambda a: self.base_url + a
self.book_mappings = [
Mapping('title'),
Mapping('id', remote_field='key', formatter=get_remote_id),
Mapping("title"),
Mapping("id", remote_field="key", formatter=get_remote_id),
Mapping("cover", remote_field="covers", formatter=self.get_cover_url),
Mapping("sortTitle", remote_field="sort_title"),
Mapping("subtitle"),
Mapping("description", formatter=get_description),
Mapping("languages", formatter=get_languages),
Mapping("series", formatter=get_first),
Mapping("seriesNumber", remote_field="series_number"),
Mapping("subjects"),
Mapping("subjectPlaces", remote_field="subject_places"),
Mapping("isbn13", remote_field="isbn_13", formatter=get_first),
Mapping("isbn10", remote_field="isbn_10", formatter=get_first),
Mapping("lccn", formatter=get_first),
Mapping("oclcNumber", remote_field="oclc_numbers", formatter=get_first),
Mapping(
'cover', remote_field='covers', formatter=self.get_cover_url),
Mapping('sortTitle', remote_field='sort_title'),
Mapping('subtitle'),
Mapping('description', formatter=get_description),
Mapping('languages', formatter=get_languages),
Mapping('series', formatter=get_first),
Mapping('seriesNumber', remote_field='series_number'),
Mapping('subjects'),
Mapping('subjectPlaces', remote_field='subject_places'),
Mapping('isbn13', remote_field='isbn_13', formatter=get_first),
Mapping('isbn10', remote_field='isbn_10', formatter=get_first),
Mapping('lccn', formatter=get_first),
Mapping(
'oclcNumber', remote_field='oclc_numbers',
formatter=get_first
"openlibraryKey", remote_field="key", formatter=get_openlibrary_key
),
Mapping("goodreadsKey", remote_field="goodreads_key"),
Mapping("asin"),
Mapping(
'openlibraryKey', remote_field='key',
formatter=get_openlibrary_key
"firstPublishedDate",
remote_field="first_publish_date",
),
Mapping('goodreadsKey', remote_field='goodreads_key'),
Mapping('asin'),
Mapping(
'firstPublishedDate', remote_field='first_publish_date',
),
Mapping('publishedDate', remote_field='publish_date'),
Mapping('pages', remote_field='number_of_pages'),
Mapping('physicalFormat', remote_field='physical_format'),
Mapping('publishers'),
Mapping("publishedDate", remote_field="publish_date"),
Mapping("pages", remote_field="number_of_pages"),
Mapping("physicalFormat", remote_field="physical_format"),
Mapping("publishers"),
]
self.author_mappings = [
Mapping('id', remote_field='key', formatter=get_remote_id),
Mapping('name'),
Mapping("id", remote_field="key", formatter=get_remote_id),
Mapping("name"),
Mapping(
'openlibraryKey', remote_field='key',
formatter=get_openlibrary_key
"openlibraryKey", remote_field="key", formatter=get_openlibrary_key
),
Mapping('born', remote_field='birth_date'),
Mapping('died', remote_field='death_date'),
Mapping('bio', formatter=get_description),
Mapping("born", remote_field="birth_date"),
Mapping("died", remote_field="death_date"),
Mapping("bio", formatter=get_description),
]
def get_remote_id_from_data(self, data):
''' format a url from an openlibrary id field '''
""" format a url from an openlibrary id field """
try:
key = data['key']
key = data["key"]
except KeyError:
raise ConnectorException('Invalid book data')
return '%s%s' % (self.books_url, key)
raise ConnectorException("Invalid book data")
return "%s%s" % (self.books_url, key)
def is_work_data(self, data):
return bool(re.match(r'^[\/\w]+OL\d+W$', data['key']))
return bool(re.match(r"^[\/\w]+OL\d+W$", data["key"]))
def get_edition_from_work_data(self, data):
try:
key = data['key']
key = data["key"]
except KeyError:
raise ConnectorException('Invalid book data')
url = '%s%s/editions' % (self.books_url, key)
raise ConnectorException("Invalid book data")
url = "%s%s/editions" % (self.books_url, key)
data = get_data(url)
return pick_default_edition(data['entries'])
return pick_default_edition(data["entries"])
def get_work_from_edition_data(self, data):
try:
key = data['works'][0]['key']
key = data["works"][0]["key"]
except (IndexError, KeyError):
raise ConnectorException('No work found for edition')
url = '%s%s' % (self.books_url, key)
raise ConnectorException("No work found for edition")
url = "%s%s" % (self.books_url, key)
return get_data(url)
def get_authors_from_data(self, data):
''' parse author json and load or create authors '''
for author_blob in data.get('authors', []):
author_blob = author_blob.get('author', author_blob)
""" parse author json and load or create authors """
for author_blob in data.get("authors", []):
author_blob = author_blob.get("author", author_blob)
# this id is "/authors/OL1234567A"
author_id = author_blob['key']
url = '%s%s' % (self.base_url, author_id)
author_id = author_blob["key"]
url = "%s%s" % (self.base_url, author_id)
yield self.get_or_create_author(url)
def get_cover_url(self, cover_blob):
''' ask openlibrary for the cover '''
""" ask openlibrary for the cover """
cover_id = cover_blob[0]
image_name = '%s-L.jpg' % cover_id
return '%s/b/id/%s' % (self.covers_url, image_name)
image_name = "%s-L.jpg" % cover_id
return "%s/b/id/%s" % (self.covers_url, image_name)
def parse_search_data(self, data):
return data.get('docs')
return data.get("docs")
def format_search_result(self, search_result):
# build the remote id from the openlibrary key
key = self.books_url + search_result['key']
author = search_result.get('author_name') or ['Unknown']
key = self.books_url + search_result["key"]
author = search_result.get("author_name") or ["Unknown"]
return SearchResult(
title=search_result.get('title'),
title=search_result.get("title"),
key=key,
author=', '.join(author),
author=", ".join(author),
connector=self,
year=search_result.get('first_publish_year'),
year=search_result.get("first_publish_year"),
)
def parse_isbn_search_data(self, data):
return list(data.values())
def format_isbn_search_result(self, search_result):
# build the remote id from the openlibrary key
key = self.books_url + search_result['key']
authors = search_result.get('authors') or [{'name': 'Unknown'}]
author_names = [ author.get('name') for author in authors]
key = self.books_url + search_result["key"]
authors = search_result.get("authors") or [{"name": "Unknown"}]
author_names = [author.get("name") for author in authors]
return SearchResult(
title=search_result.get('title'),
title=search_result.get("title"),
key=key,
author=', '.join(author_names),
author=", ".join(author_names),
connector=self,
year=search_result.get('publish_date'),
year=search_result.get("publish_date"),
)
def load_edition_data(self, olkey):
''' query openlibrary for editions of a work '''
url = '%s/works/%s/editions' % (self.books_url, olkey)
""" query openlibrary for editions of a work """
url = "%s/works/%s/editions" % (self.books_url, olkey)
return get_data(url)
def expand_book_data(self, book):
work = book
# go from the edition to the work, if necessary
@ -164,7 +150,7 @@ class Connector(AbstractConnector):
# who knows, man
return
for edition_data in edition_options.get('entries'):
for edition_data in edition_options.get("entries"):
# does this edition have ANY interesting data?
if ignore_edition(edition_data):
continue
@ -172,62 +158,63 @@ class Connector(AbstractConnector):
def ignore_edition(edition_data):
''' don't load a million editions that have no metadata '''
""" don't load a million editions that have no metadata """
# an isbn, we love to see it
if edition_data.get('isbn_13') or edition_data.get('isbn_10'):
print(edition_data.get('isbn_10'))
if edition_data.get("isbn_13") or edition_data.get("isbn_10"):
print(edition_data.get("isbn_10"))
return False
# grudgingly, oclc can stay
if edition_data.get('oclc_numbers'):
print(edition_data.get('oclc_numbers'))
if edition_data.get("oclc_numbers"):
print(edition_data.get("oclc_numbers"))
return False
# if it has a cover it can stay
if edition_data.get('covers'):
print(edition_data.get('covers'))
if edition_data.get("covers"):
print(edition_data.get("covers"))
return False
# keep non-english editions
if edition_data.get('languages') and \
'languages/eng' not in str(edition_data.get('languages')):
print(edition_data.get('languages'))
if edition_data.get("languages") and "languages/eng" not in str(
edition_data.get("languages")
):
print(edition_data.get("languages"))
return False
return True
def get_description(description_blob):
''' descriptions can be a string or a dict '''
""" descriptions can be a string or a dict """
if isinstance(description_blob, dict):
return description_blob.get('value')
return description_blob.get("value")
return description_blob
def get_openlibrary_key(key):
''' convert /books/OL27320736M into OL27320736M '''
return key.split('/')[-1]
""" convert /books/OL27320736M into OL27320736M """
return key.split("/")[-1]
def get_languages(language_blob):
''' /language/eng -> English '''
""" /language/eng -> English """
langs = []
for lang in language_blob:
langs.append(
languages.get(lang.get('key', ''), None)
)
langs.append(languages.get(lang.get("key", ""), None))
return langs
def pick_default_edition(options):
''' favor physical copies with covers in english '''
""" favor physical copies with covers in english """
if not options:
return None
if len(options) == 1:
return options[0]
options = [e for e in options if e.get('covers')] or options
options = [e for e in options if \
'/languages/eng' in str(e.get('languages'))] or options
formats = ['paperback', 'hardcover', 'mass market paperback']
options = [e for e in options if \
str(e.get('physical_format')).lower() in formats] or options
options = [e for e in options if e.get('isbn_13')] or options
options = [e for e in options if e.get('ocaid')] or options
options = [e for e in options if e.get("covers")] or options
options = [
e for e in options if "/languages/eng" in str(e.get("languages"))
] or options
formats = ["paperback", "hardcover", "mass market paperback"]
options = [
e for e in options if str(e.get("physical_format")).lower() in formats
] or options
options = [e for e in options if e.get("isbn_13")] or options
options = [e for e in options if e.get("ocaid")] or options
return options[0]

View File

@ -1,467 +1,467 @@
''' key lookups for openlibrary languages '''
""" key lookups for openlibrary languages """
languages = {
'/languages/eng': 'English',
'/languages/fre': 'French',
'/languages/spa': 'Spanish',
'/languages/ger': 'German',
'/languages/rus': 'Russian',
'/languages/ita': 'Italian',
'/languages/chi': 'Chinese',
'/languages/jpn': 'Japanese',
'/languages/por': 'Portuguese',
'/languages/ara': 'Arabic',
'/languages/pol': 'Polish',
'/languages/heb': 'Hebrew',
'/languages/kor': 'Korean',
'/languages/dut': 'Dutch',
'/languages/ind': 'Indonesian',
'/languages/lat': 'Latin',
'/languages/und': 'Undetermined',
'/languages/cmn': 'Mandarin',
'/languages/hin': 'Hindi',
'/languages/swe': 'Swedish',
'/languages/dan': 'Danish',
'/languages/urd': 'Urdu',
'/languages/hun': 'Hungarian',
'/languages/cze': 'Czech',
'/languages/tur': 'Turkish',
'/languages/ukr': 'Ukrainian',
'/languages/gre': 'Greek',
'/languages/vie': 'Vietnamese',
'/languages/bul': 'Bulgarian',
'/languages/ben': 'Bengali',
'/languages/rum': 'Romanian',
'/languages/cat': 'Catalan',
'/languages/nor': 'Norwegian',
'/languages/tha': 'Thai',
'/languages/per': 'Persian',
'/languages/scr': 'Croatian',
'/languages/mul': 'Multiple languages',
'/languages/fin': 'Finnish',
'/languages/tam': 'Tamil',
'/languages/guj': 'Gujarati',
'/languages/mar': 'Marathi',
'/languages/scc': 'Serbian',
'/languages/pan': 'Panjabi',
'/languages/wel': 'Welsh',
'/languages/tel': 'Telugu',
'/languages/yid': 'Yiddish',
'/languages/kan': 'Kannada',
'/languages/slo': 'Slovak',
'/languages/san': 'Sanskrit',
'/languages/arm': 'Armenian',
'/languages/mal': 'Malayalam',
'/languages/may': 'Malay',
'/languages/bur': 'Burmese',
'/languages/slv': 'Slovenian',
'/languages/lit': 'Lithuanian',
'/languages/tib': 'Tibetan',
'/languages/lav': 'Latvian',
'/languages/est': 'Estonian',
'/languages/nep': 'Nepali',
'/languages/ori': 'Oriya',
'/languages/mon': 'Mongolian',
'/languages/alb': 'Albanian',
'/languages/iri': 'Irish',
'/languages/geo': 'Georgian',
'/languages/afr': 'Afrikaans',
'/languages/grc': 'Ancient Greek',
'/languages/mac': 'Macedonian',
'/languages/bel': 'Belarusian',
'/languages/ice': 'Icelandic',
'/languages/srp': 'Serbian',
'/languages/snh': 'Sinhalese',
'/languages/snd': 'Sindhi',
'/languages/ota': 'Turkish, Ottoman',
'/languages/kur': 'Kurdish',
'/languages/aze': 'Azerbaijani',
'/languages/pus': 'Pushto',
'/languages/amh': 'Amharic',
'/languages/gag': 'Galician',
'/languages/hrv': 'Croatian',
'/languages/sin': 'Sinhalese',
'/languages/asm': 'Assamese',
'/languages/uzb': 'Uzbek',
'/languages/gae': 'Scottish Gaelix',
'/languages/kaz': 'Kazakh',
'/languages/swa': 'Swahili',
'/languages/bos': 'Bosnian',
'/languages/glg': 'Galician ',
'/languages/baq': 'Basque',
'/languages/tgl': 'Tagalog',
'/languages/raj': 'Rajasthani',
'/languages/gle': 'Irish',
'/languages/lao': 'Lao',
'/languages/jav': 'Javanese',
'/languages/mai': 'Maithili',
'/languages/tgk': 'Tajik ',
'/languages/khm': 'Khmer',
'/languages/roh': 'Raeto-Romance',
'/languages/kok': 'Konkani ',
'/languages/sit': 'Sino-Tibetan (Other)',
'/languages/mol': 'Moldavian',
'/languages/kir': 'Kyrgyz',
'/languages/new': 'Newari',
'/languages/inc': 'Indic (Other)',
'/languages/frm': 'French, Middle (ca. 1300-1600)',
'/languages/esp': 'Esperanto',
'/languages/hau': 'Hausa',
'/languages/tag': 'Tagalog',
'/languages/tuk': 'Turkmen',
'/languages/enm': 'English, Middle (1100-1500)',
'/languages/map': 'Austronesian (Other)',
'/languages/pli': 'Pali',
'/languages/fro': 'French, Old (ca. 842-1300)',
'/languages/nic': 'Niger-Kordofanian (Other)',
'/languages/tir': 'Tigrinya',
'/languages/wen': 'Sorbian (Other)',
'/languages/bho': 'Bhojpuri',
'/languages/roa': 'Romance (Other)',
'/languages/tut': 'Altaic (Other)',
'/languages/bra': 'Braj',
'/languages/sun': 'Sundanese',
'/languages/fiu': 'Finno-Ugrian (Other)',
'/languages/far': 'Faroese',
'/languages/ban': 'Balinese',
'/languages/tar': 'Tatar',
'/languages/bak': 'Bashkir',
'/languages/tat': 'Tatar',
'/languages/chu': 'Church Slavic',
'/languages/dra': 'Dravidian (Other)',
'/languages/pra': 'Prakrit languages',
'/languages/paa': 'Papuan (Other)',
'/languages/doi': 'Dogri',
'/languages/lah': 'Lahndā',
'/languages/mni': 'Manipuri',
'/languages/yor': 'Yoruba',
'/languages/gmh': 'German, Middle High (ca. 1050-1500)',
'/languages/kas': 'Kashmiri',
'/languages/fri': 'Frisian',
'/languages/mla': 'Malagasy',
'/languages/egy': 'Egyptian',
'/languages/rom': 'Romani',
'/languages/syr': 'Syriac, Modern',
'/languages/cau': 'Caucasian (Other)',
'/languages/hbs': 'Serbo-Croatian',
'/languages/sai': 'South American Indian (Other)',
'/languages/pro': 'Provençal (to 1500)',
'/languages/cpf': 'Creoles and Pidgins, French-based (Other)',
'/languages/ang': 'English, Old (ca. 450-1100)',
'/languages/bal': 'Baluchi',
'/languages/gla': 'Scottish Gaelic',
'/languages/chv': 'Chuvash',
'/languages/kin': 'Kinyarwanda',
'/languages/zul': 'Zulu',
'/languages/sla': 'Slavic (Other)',
'/languages/som': 'Somali',
'/languages/mlt': 'Maltese',
'/languages/uig': 'Uighur',
'/languages/mlg': 'Malagasy',
'/languages/sho': 'Shona',
'/languages/lan': 'Occitan (post 1500)',
'/languages/bre': 'Breton',
'/languages/sco': 'Scots',
'/languages/sso': 'Sotho',
'/languages/myn': 'Mayan languages',
'/languages/xho': 'Xhosa',
'/languages/gem': 'Germanic (Other)',
'/languages/esk': 'Eskimo languages',
'/languages/akk': 'Akkadian',
'/languages/div': 'Maldivian',
'/languages/sah': 'Yakut',
'/languages/tsw': 'Tswana',
'/languages/nso': 'Northern Sotho',
'/languages/pap': 'Papiamento',
'/languages/bnt': 'Bantu (Other)',
'/languages/oss': 'Ossetic',
'/languages/cre': 'Cree',
'/languages/ibo': 'Igbo',
'/languages/fao': 'Faroese',
'/languages/nai': 'North American Indian (Other)',
'/languages/mag': 'Magahi',
'/languages/arc': 'Aramaic',
'/languages/epo': 'Esperanto',
'/languages/kha': 'Khasi',
'/languages/oji': 'Ojibwa',
'/languages/que': 'Quechua',
'/languages/lug': 'Ganda',
'/languages/mwr': 'Marwari',
'/languages/awa': 'Awadhi ',
'/languages/cor': 'Cornish',
'/languages/lad': 'Ladino',
'/languages/dzo': 'Dzongkha',
'/languages/cop': 'Coptic',
'/languages/nah': 'Nahuatl',
'/languages/cai': 'Central American Indian (Other)',
'/languages/phi': 'Philippine (Other)',
'/languages/moh': 'Mohawk',
'/languages/crp': 'Creoles and Pidgins (Other)',
'/languages/nya': 'Nyanja',
'/languages/wol': 'Wolof ',
'/languages/haw': 'Hawaiian',
'/languages/eth': 'Ethiopic',
'/languages/mis': 'Miscellaneous languages',
'/languages/mkh': 'Mon-Khmer (Other)',
'/languages/alg': 'Algonquian (Other)',
'/languages/nde': 'Ndebele (Zimbabwe)',
'/languages/ssa': 'Nilo-Saharan (Other)',
'/languages/chm': 'Mari',
'/languages/che': 'Chechen',
'/languages/gez': 'Ethiopic',
'/languages/ven': 'Venda',
'/languages/cam': 'Khmer',
'/languages/fur': 'Friulian',
'/languages/ful': 'Fula',
'/languages/gal': 'Oromo',
'/languages/jrb': 'Judeo-Arabic',
'/languages/bua': 'Buriat',
'/languages/ady': 'Adygei',
'/languages/bem': 'Bemba',
'/languages/kar': 'Karen languages',
'/languages/sna': 'Shona',
'/languages/twi': 'Twi',
'/languages/btk': 'Batak',
'/languages/kaa': 'Kara-Kalpak',
'/languages/kom': 'Komi',
'/languages/sot': 'Sotho',
'/languages/tso': 'Tsonga',
'/languages/cpe': 'Creoles and Pidgins, English-based (Other)',
'/languages/gua': 'Guarani',
'/languages/mao': 'Maori',
'/languages/mic': 'Micmac',
'/languages/swz': 'Swazi',
'/languages/taj': 'Tajik',
'/languages/smo': 'Samoan',
'/languages/ace': 'Achinese',
'/languages/afa': 'Afroasiatic (Other)',
'/languages/lap': 'Sami',
'/languages/min': 'Minangkabau',
'/languages/oci': 'Occitan (post 1500)',
'/languages/tsn': 'Tswana',
'/languages/pal': 'Pahlavi',
'/languages/sux': 'Sumerian',
'/languages/ewe': 'Ewe',
'/languages/him': 'Himachali',
'/languages/kaw': 'Kawi',
'/languages/lus': 'Lushai',
'/languages/ceb': 'Cebuano',
'/languages/chr': 'Cherokee',
'/languages/fil': 'Filipino',
'/languages/ndo': 'Ndonga',
'/languages/ilo': 'Iloko',
'/languages/kbd': 'Kabardian',
'/languages/orm': 'Oromo',
'/languages/dum': 'Dutch, Middle (ca. 1050-1350)',
'/languages/bam': 'Bambara',
'/languages/goh': 'Old High German',
'/languages/got': 'Gothic',
'/languages/kon': 'Kongo',
'/languages/mun': 'Munda (Other)',
'/languages/kru': 'Kurukh',
'/languages/pam': 'Pampanga',
'/languages/grn': 'Guarani',
'/languages/gaa': '',
'/languages/fry': 'Frisian',
'/languages/iba': 'Iban',
'/languages/mak': 'Makasar',
'/languages/kik': 'Kikuyu',
'/languages/cho': 'Choctaw',
'/languages/cpp': 'Creoles and Pidgins, Portuguese-based (Other)',
'/languages/dak': 'Dakota',
'/languages/udm': 'Udmurt ',
'/languages/hat': 'Haitian French Creole',
'/languages/mus': 'Creek',
'/languages/ber': 'Berber (Other)',
'/languages/hil': 'Hiligaynon',
'/languages/iro': 'Iroquoian (Other)',
'/languages/kua': 'Kuanyama',
'/languages/mno': 'Manobo languages',
'/languages/run': 'Rundi',
'/languages/sat': 'Santali',
'/languages/shn': 'Shan',
'/languages/tyv': 'Tuvinian',
'/languages/chg': 'Chagatai',
'/languages/syc': 'Syriac',
'/languages/ath': 'Athapascan (Other)',
'/languages/aym': 'Aymara',
'/languages/bug': 'Bugis',
'/languages/cel': 'Celtic (Other)',
'/languages/int': 'Interlingua (International Auxiliary Language Association)',
'/languages/xal': 'Oirat',
'/languages/ava': 'Avaric',
'/languages/son': 'Songhai',
'/languages/tah': 'Tahitian',
'/languages/tet': 'Tetum',
'/languages/ira': 'Iranian (Other)',
'/languages/kac': 'Kachin',
'/languages/nob': 'Norwegian (Bokmål)',
'/languages/vai': 'Vai',
'/languages/bik': 'Bikol',
'/languages/mos': 'Mooré',
'/languages/tig': 'Tigré',
'/languages/fat': 'Fanti',
'/languages/her': 'Herero',
'/languages/kal': 'Kalâtdlisut',
'/languages/mad': 'Madurese',
'/languages/yue': 'Cantonese',
'/languages/chn': 'Chinook jargon',
'/languages/hmn': 'Hmong',
'/languages/lin': 'Lingala',
'/languages/man': 'Mandingo',
'/languages/nds': 'Low German',
'/languages/bas': 'Basa',
'/languages/gay': 'Gayo',
'/languages/gsw': 'gsw',
'/languages/ine': 'Indo-European (Other)',
'/languages/kro': 'Kru (Other)',
'/languages/kum': 'Kumyk',
'/languages/tsi': 'Tsimshian',
'/languages/zap': 'Zapotec',
'/languages/ach': 'Acoli',
'/languages/ada': 'Adangme',
'/languages/aka': 'Akan',
'/languages/khi': 'Khoisan (Other)',
'/languages/srd': 'Sardinian',
'/languages/arn': 'Mapuche',
'/languages/dyu': 'Dyula',
'/languages/loz': 'Lozi',
'/languages/ltz': 'Luxembourgish',
'/languages/sag': 'Sango (Ubangi Creole)',
'/languages/lez': 'Lezgian',
'/languages/luo': 'Luo (Kenya and Tanzania)',
'/languages/ssw': 'Swazi ',
'/languages/krc': 'Karachay-Balkar',
'/languages/nyn': 'Nyankole',
'/languages/sal': 'Salishan languages',
'/languages/jpr': 'Judeo-Persian',
'/languages/pau': 'Palauan',
'/languages/smi': 'Sami',
'/languages/aar': 'Afar',
'/languages/abk': 'Abkhaz',
'/languages/gon': 'Gondi',
'/languages/nzi': 'Nzima',
'/languages/sam': 'Samaritan Aramaic',
'/languages/sao': 'Samoan',
'/languages/srr': 'Serer',
'/languages/apa': 'Apache languages',
'/languages/crh': 'Crimean Tatar',
'/languages/efi': 'Efik',
'/languages/iku': 'Inuktitut',
'/languages/nav': 'Navajo',
'/languages/pon': 'Ponape',
'/languages/tmh': 'Tamashek',
'/languages/aus': 'Australian languages',
'/languages/oto': 'Otomian languages',
'/languages/war': 'Waray',
'/languages/ypk': 'Yupik languages',
'/languages/ave': 'Avestan',
'/languages/cus': 'Cushitic (Other)',
'/languages/del': 'Delaware',
'/languages/fon': 'Fon',
'/languages/ina': 'Interlingua (International Auxiliary Language Association)',
'/languages/myv': 'Erzya',
'/languages/pag': 'Pangasinan',
'/languages/peo': 'Old Persian (ca. 600-400 B.C.)',
'/languages/vls': 'Flemish',
'/languages/bai': 'Bamileke languages',
'/languages/bla': 'Siksika',
'/languages/day': 'Dayak',
'/languages/men': 'Mende',
'/languages/tai': 'Tai',
'/languages/ton': 'Tongan',
'/languages/uga': 'Ugaritic',
'/languages/yao': 'Yao (Africa)',
'/languages/zza': 'Zaza',
'/languages/bin': 'Edo',
'/languages/frs': 'East Frisian',
'/languages/inh': 'Ingush',
'/languages/mah': 'Marshallese',
'/languages/sem': 'Semitic (Other)',
'/languages/art': 'Artificial (Other)',
'/languages/chy': 'Cheyenne',
'/languages/cmc': 'Chamic languages',
'/languages/dar': 'Dargwa',
'/languages/dua': 'Duala',
'/languages/elx': 'Elamite',
'/languages/fan': 'Fang',
'/languages/fij': 'Fijian',
'/languages/gil': 'Gilbertese',
'/languages/ijo': 'Ijo',
'/languages/kam': 'Kamba',
'/languages/nog': 'Nogai',
'/languages/non': 'Old Norse',
'/languages/tem': 'Temne',
'/languages/arg': 'Aragonese',
'/languages/arp': 'Arapaho',
'/languages/arw': 'Arawak',
'/languages/din': 'Dinka',
'/languages/grb': 'Grebo',
'/languages/kos': 'Kusaie',
'/languages/lub': 'Luba-Katanga',
'/languages/mnc': 'Manchu',
'/languages/nyo': 'Nyoro',
'/languages/rar': 'Rarotongan',
'/languages/sel': 'Selkup',
'/languages/tkl': 'Tokelauan',
'/languages/tog': 'Tonga (Nyasa)',
'/languages/tum': 'Tumbuka',
'/languages/alt': 'Altai',
'/languages/ase': 'American Sign Language',
'/languages/ast': 'Asturian',
'/languages/chk': 'Chuukese',
'/languages/cos': 'Corsican',
'/languages/ewo': 'Ewondo',
'/languages/gor': 'Gorontalo',
'/languages/hmo': 'Hiri Motu',
'/languages/lol': 'Mongo-Nkundu',
'/languages/lun': 'Lunda',
'/languages/mas': 'Masai',
'/languages/niu': 'Niuean',
'/languages/rup': 'Aromanian',
'/languages/sas': 'Sasak',
'/languages/sio': 'Siouan (Other)',
'/languages/sus': 'Susu',
'/languages/zun': 'Zuni',
'/languages/bat': 'Baltic (Other)',
'/languages/car': 'Carib',
'/languages/cha': 'Chamorro',
'/languages/kab': 'Kabyle',
'/languages/kau': 'Kanuri',
'/languages/kho': 'Khotanese',
'/languages/lua': 'Luba-Lulua',
'/languages/mdf': 'Moksha',
'/languages/nbl': 'Ndebele (South Africa)',
'/languages/umb': 'Umbundu',
'/languages/wak': 'Wakashan languages',
'/languages/wal': 'Wolayta',
'/languages/ale': 'Aleut',
'/languages/bis': 'Bislama',
'/languages/gba': 'Gbaya',
'/languages/glv': 'Manx',
'/languages/gul': 'Gullah',
'/languages/ipk': 'Inupiaq',
'/languages/krl': 'Karelian',
'/languages/lam': 'Lamba (Zambia and Congo)',
'/languages/sad': 'Sandawe',
'/languages/sid': 'Sidamo',
'/languages/snk': 'Soninke',
'/languages/srn': 'Sranan',
'/languages/suk': 'Sukuma',
'/languages/ter': 'Terena',
'/languages/tiv': 'Tiv',
'/languages/tli': 'Tlingit',
'/languages/tpi': 'Tok Pisin',
'/languages/tvl': 'Tuvaluan',
'/languages/yap': 'Yapese',
'/languages/eka': 'Ekajuk',
'/languages/hsb': 'Upper Sorbian',
'/languages/ido': 'Ido',
'/languages/kmb': 'Kimbundu',
'/languages/kpe': 'Kpelle',
'/languages/mwl': 'Mirandese',
'/languages/nno': 'Nynorsk',
'/languages/nub': 'Nubian languages',
'/languages/osa': 'Osage',
'/languages/sme': 'Northern Sami',
'/languages/znd': 'Zande languages',
"/languages/eng": "English",
"/languages/fre": "French",
"/languages/spa": "Spanish",
"/languages/ger": "German",
"/languages/rus": "Russian",
"/languages/ita": "Italian",
"/languages/chi": "Chinese",
"/languages/jpn": "Japanese",
"/languages/por": "Portuguese",
"/languages/ara": "Arabic",
"/languages/pol": "Polish",
"/languages/heb": "Hebrew",
"/languages/kor": "Korean",
"/languages/dut": "Dutch",
"/languages/ind": "Indonesian",
"/languages/lat": "Latin",
"/languages/und": "Undetermined",
"/languages/cmn": "Mandarin",
"/languages/hin": "Hindi",
"/languages/swe": "Swedish",
"/languages/dan": "Danish",
"/languages/urd": "Urdu",
"/languages/hun": "Hungarian",
"/languages/cze": "Czech",
"/languages/tur": "Turkish",
"/languages/ukr": "Ukrainian",
"/languages/gre": "Greek",
"/languages/vie": "Vietnamese",
"/languages/bul": "Bulgarian",
"/languages/ben": "Bengali",
"/languages/rum": "Romanian",
"/languages/cat": "Catalan",
"/languages/nor": "Norwegian",
"/languages/tha": "Thai",
"/languages/per": "Persian",
"/languages/scr": "Croatian",
"/languages/mul": "Multiple languages",
"/languages/fin": "Finnish",
"/languages/tam": "Tamil",
"/languages/guj": "Gujarati",
"/languages/mar": "Marathi",
"/languages/scc": "Serbian",
"/languages/pan": "Panjabi",
"/languages/wel": "Welsh",
"/languages/tel": "Telugu",
"/languages/yid": "Yiddish",
"/languages/kan": "Kannada",
"/languages/slo": "Slovak",
"/languages/san": "Sanskrit",
"/languages/arm": "Armenian",
"/languages/mal": "Malayalam",
"/languages/may": "Malay",
"/languages/bur": "Burmese",
"/languages/slv": "Slovenian",
"/languages/lit": "Lithuanian",
"/languages/tib": "Tibetan",
"/languages/lav": "Latvian",
"/languages/est": "Estonian",
"/languages/nep": "Nepali",
"/languages/ori": "Oriya",
"/languages/mon": "Mongolian",
"/languages/alb": "Albanian",
"/languages/iri": "Irish",
"/languages/geo": "Georgian",
"/languages/afr": "Afrikaans",
"/languages/grc": "Ancient Greek",
"/languages/mac": "Macedonian",
"/languages/bel": "Belarusian",
"/languages/ice": "Icelandic",
"/languages/srp": "Serbian",
"/languages/snh": "Sinhalese",
"/languages/snd": "Sindhi",
"/languages/ota": "Turkish, Ottoman",
"/languages/kur": "Kurdish",
"/languages/aze": "Azerbaijani",
"/languages/pus": "Pushto",
"/languages/amh": "Amharic",
"/languages/gag": "Galician",
"/languages/hrv": "Croatian",
"/languages/sin": "Sinhalese",
"/languages/asm": "Assamese",
"/languages/uzb": "Uzbek",
"/languages/gae": "Scottish Gaelix",
"/languages/kaz": "Kazakh",
"/languages/swa": "Swahili",
"/languages/bos": "Bosnian",
"/languages/glg": "Galician ",
"/languages/baq": "Basque",
"/languages/tgl": "Tagalog",
"/languages/raj": "Rajasthani",
"/languages/gle": "Irish",
"/languages/lao": "Lao",
"/languages/jav": "Javanese",
"/languages/mai": "Maithili",
"/languages/tgk": "Tajik ",
"/languages/khm": "Khmer",
"/languages/roh": "Raeto-Romance",
"/languages/kok": "Konkani ",
"/languages/sit": "Sino-Tibetan (Other)",
"/languages/mol": "Moldavian",
"/languages/kir": "Kyrgyz",
"/languages/new": "Newari",
"/languages/inc": "Indic (Other)",
"/languages/frm": "French, Middle (ca. 1300-1600)",
"/languages/esp": "Esperanto",
"/languages/hau": "Hausa",
"/languages/tag": "Tagalog",
"/languages/tuk": "Turkmen",
"/languages/enm": "English, Middle (1100-1500)",
"/languages/map": "Austronesian (Other)",
"/languages/pli": "Pali",
"/languages/fro": "French, Old (ca. 842-1300)",
"/languages/nic": "Niger-Kordofanian (Other)",
"/languages/tir": "Tigrinya",
"/languages/wen": "Sorbian (Other)",
"/languages/bho": "Bhojpuri",
"/languages/roa": "Romance (Other)",
"/languages/tut": "Altaic (Other)",
"/languages/bra": "Braj",
"/languages/sun": "Sundanese",
"/languages/fiu": "Finno-Ugrian (Other)",
"/languages/far": "Faroese",
"/languages/ban": "Balinese",
"/languages/tar": "Tatar",
"/languages/bak": "Bashkir",
"/languages/tat": "Tatar",
"/languages/chu": "Church Slavic",
"/languages/dra": "Dravidian (Other)",
"/languages/pra": "Prakrit languages",
"/languages/paa": "Papuan (Other)",
"/languages/doi": "Dogri",
"/languages/lah": "Lahndā",
"/languages/mni": "Manipuri",
"/languages/yor": "Yoruba",
"/languages/gmh": "German, Middle High (ca. 1050-1500)",
"/languages/kas": "Kashmiri",
"/languages/fri": "Frisian",
"/languages/mla": "Malagasy",
"/languages/egy": "Egyptian",
"/languages/rom": "Romani",
"/languages/syr": "Syriac, Modern",
"/languages/cau": "Caucasian (Other)",
"/languages/hbs": "Serbo-Croatian",
"/languages/sai": "South American Indian (Other)",
"/languages/pro": "Provençal (to 1500)",
"/languages/cpf": "Creoles and Pidgins, French-based (Other)",
"/languages/ang": "English, Old (ca. 450-1100)",
"/languages/bal": "Baluchi",
"/languages/gla": "Scottish Gaelic",
"/languages/chv": "Chuvash",
"/languages/kin": "Kinyarwanda",
"/languages/zul": "Zulu",
"/languages/sla": "Slavic (Other)",
"/languages/som": "Somali",
"/languages/mlt": "Maltese",
"/languages/uig": "Uighur",
"/languages/mlg": "Malagasy",
"/languages/sho": "Shona",
"/languages/lan": "Occitan (post 1500)",
"/languages/bre": "Breton",
"/languages/sco": "Scots",
"/languages/sso": "Sotho",
"/languages/myn": "Mayan languages",
"/languages/xho": "Xhosa",
"/languages/gem": "Germanic (Other)",
"/languages/esk": "Eskimo languages",
"/languages/akk": "Akkadian",
"/languages/div": "Maldivian",
"/languages/sah": "Yakut",
"/languages/tsw": "Tswana",
"/languages/nso": "Northern Sotho",
"/languages/pap": "Papiamento",
"/languages/bnt": "Bantu (Other)",
"/languages/oss": "Ossetic",
"/languages/cre": "Cree",
"/languages/ibo": "Igbo",
"/languages/fao": "Faroese",
"/languages/nai": "North American Indian (Other)",
"/languages/mag": "Magahi",
"/languages/arc": "Aramaic",
"/languages/epo": "Esperanto",
"/languages/kha": "Khasi",
"/languages/oji": "Ojibwa",
"/languages/que": "Quechua",
"/languages/lug": "Ganda",
"/languages/mwr": "Marwari",
"/languages/awa": "Awadhi ",
"/languages/cor": "Cornish",
"/languages/lad": "Ladino",
"/languages/dzo": "Dzongkha",
"/languages/cop": "Coptic",
"/languages/nah": "Nahuatl",
"/languages/cai": "Central American Indian (Other)",
"/languages/phi": "Philippine (Other)",
"/languages/moh": "Mohawk",
"/languages/crp": "Creoles and Pidgins (Other)",
"/languages/nya": "Nyanja",
"/languages/wol": "Wolof ",
"/languages/haw": "Hawaiian",
"/languages/eth": "Ethiopic",
"/languages/mis": "Miscellaneous languages",
"/languages/mkh": "Mon-Khmer (Other)",
"/languages/alg": "Algonquian (Other)",
"/languages/nde": "Ndebele (Zimbabwe)",
"/languages/ssa": "Nilo-Saharan (Other)",
"/languages/chm": "Mari",
"/languages/che": "Chechen",
"/languages/gez": "Ethiopic",
"/languages/ven": "Venda",
"/languages/cam": "Khmer",
"/languages/fur": "Friulian",
"/languages/ful": "Fula",
"/languages/gal": "Oromo",
"/languages/jrb": "Judeo-Arabic",
"/languages/bua": "Buriat",
"/languages/ady": "Adygei",
"/languages/bem": "Bemba",
"/languages/kar": "Karen languages",
"/languages/sna": "Shona",
"/languages/twi": "Twi",
"/languages/btk": "Batak",
"/languages/kaa": "Kara-Kalpak",
"/languages/kom": "Komi",
"/languages/sot": "Sotho",
"/languages/tso": "Tsonga",
"/languages/cpe": "Creoles and Pidgins, English-based (Other)",
"/languages/gua": "Guarani",
"/languages/mao": "Maori",
"/languages/mic": "Micmac",
"/languages/swz": "Swazi",
"/languages/taj": "Tajik",
"/languages/smo": "Samoan",
"/languages/ace": "Achinese",
"/languages/afa": "Afroasiatic (Other)",
"/languages/lap": "Sami",
"/languages/min": "Minangkabau",
"/languages/oci": "Occitan (post 1500)",
"/languages/tsn": "Tswana",
"/languages/pal": "Pahlavi",
"/languages/sux": "Sumerian",
"/languages/ewe": "Ewe",
"/languages/him": "Himachali",
"/languages/kaw": "Kawi",
"/languages/lus": "Lushai",
"/languages/ceb": "Cebuano",
"/languages/chr": "Cherokee",
"/languages/fil": "Filipino",
"/languages/ndo": "Ndonga",
"/languages/ilo": "Iloko",
"/languages/kbd": "Kabardian",
"/languages/orm": "Oromo",
"/languages/dum": "Dutch, Middle (ca. 1050-1350)",
"/languages/bam": "Bambara",
"/languages/goh": "Old High German",
"/languages/got": "Gothic",
"/languages/kon": "Kongo",
"/languages/mun": "Munda (Other)",
"/languages/kru": "Kurukh",
"/languages/pam": "Pampanga",
"/languages/grn": "Guarani",
"/languages/gaa": "",
"/languages/fry": "Frisian",
"/languages/iba": "Iban",
"/languages/mak": "Makasar",
"/languages/kik": "Kikuyu",
"/languages/cho": "Choctaw",
"/languages/cpp": "Creoles and Pidgins, Portuguese-based (Other)",
"/languages/dak": "Dakota",
"/languages/udm": "Udmurt ",
"/languages/hat": "Haitian French Creole",
"/languages/mus": "Creek",
"/languages/ber": "Berber (Other)",
"/languages/hil": "Hiligaynon",
"/languages/iro": "Iroquoian (Other)",
"/languages/kua": "Kuanyama",
"/languages/mno": "Manobo languages",
"/languages/run": "Rundi",
"/languages/sat": "Santali",
"/languages/shn": "Shan",
"/languages/tyv": "Tuvinian",
"/languages/chg": "Chagatai",
"/languages/syc": "Syriac",
"/languages/ath": "Athapascan (Other)",
"/languages/aym": "Aymara",
"/languages/bug": "Bugis",
"/languages/cel": "Celtic (Other)",
"/languages/int": "Interlingua (International Auxiliary Language Association)",
"/languages/xal": "Oirat",
"/languages/ava": "Avaric",
"/languages/son": "Songhai",
"/languages/tah": "Tahitian",
"/languages/tet": "Tetum",
"/languages/ira": "Iranian (Other)",
"/languages/kac": "Kachin",
"/languages/nob": "Norwegian (Bokmål)",
"/languages/vai": "Vai",
"/languages/bik": "Bikol",
"/languages/mos": "Mooré",
"/languages/tig": "Tigré",
"/languages/fat": "Fanti",
"/languages/her": "Herero",
"/languages/kal": "Kalâtdlisut",
"/languages/mad": "Madurese",
"/languages/yue": "Cantonese",
"/languages/chn": "Chinook jargon",
"/languages/hmn": "Hmong",
"/languages/lin": "Lingala",
"/languages/man": "Mandingo",
"/languages/nds": "Low German",
"/languages/bas": "Basa",
"/languages/gay": "Gayo",
"/languages/gsw": "gsw",
"/languages/ine": "Indo-European (Other)",
"/languages/kro": "Kru (Other)",
"/languages/kum": "Kumyk",
"/languages/tsi": "Tsimshian",
"/languages/zap": "Zapotec",
"/languages/ach": "Acoli",
"/languages/ada": "Adangme",
"/languages/aka": "Akan",
"/languages/khi": "Khoisan (Other)",
"/languages/srd": "Sardinian",
"/languages/arn": "Mapuche",
"/languages/dyu": "Dyula",
"/languages/loz": "Lozi",
"/languages/ltz": "Luxembourgish",
"/languages/sag": "Sango (Ubangi Creole)",
"/languages/lez": "Lezgian",
"/languages/luo": "Luo (Kenya and Tanzania)",
"/languages/ssw": "Swazi ",
"/languages/krc": "Karachay-Balkar",
"/languages/nyn": "Nyankole",
"/languages/sal": "Salishan languages",
"/languages/jpr": "Judeo-Persian",
"/languages/pau": "Palauan",
"/languages/smi": "Sami",
"/languages/aar": "Afar",
"/languages/abk": "Abkhaz",
"/languages/gon": "Gondi",
"/languages/nzi": "Nzima",
"/languages/sam": "Samaritan Aramaic",
"/languages/sao": "Samoan",
"/languages/srr": "Serer",
"/languages/apa": "Apache languages",
"/languages/crh": "Crimean Tatar",
"/languages/efi": "Efik",
"/languages/iku": "Inuktitut",
"/languages/nav": "Navajo",
"/languages/pon": "Ponape",
"/languages/tmh": "Tamashek",
"/languages/aus": "Australian languages",
"/languages/oto": "Otomian languages",
"/languages/war": "Waray",
"/languages/ypk": "Yupik languages",
"/languages/ave": "Avestan",
"/languages/cus": "Cushitic (Other)",
"/languages/del": "Delaware",
"/languages/fon": "Fon",
"/languages/ina": "Interlingua (International Auxiliary Language Association)",
"/languages/myv": "Erzya",
"/languages/pag": "Pangasinan",
"/languages/peo": "Old Persian (ca. 600-400 B.C.)",
"/languages/vls": "Flemish",
"/languages/bai": "Bamileke languages",
"/languages/bla": "Siksika",
"/languages/day": "Dayak",
"/languages/men": "Mende",
"/languages/tai": "Tai",
"/languages/ton": "Tongan",
"/languages/uga": "Ugaritic",
"/languages/yao": "Yao (Africa)",
"/languages/zza": "Zaza",
"/languages/bin": "Edo",
"/languages/frs": "East Frisian",
"/languages/inh": "Ingush",
"/languages/mah": "Marshallese",
"/languages/sem": "Semitic (Other)",
"/languages/art": "Artificial (Other)",
"/languages/chy": "Cheyenne",
"/languages/cmc": "Chamic languages",
"/languages/dar": "Dargwa",
"/languages/dua": "Duala",
"/languages/elx": "Elamite",
"/languages/fan": "Fang",
"/languages/fij": "Fijian",
"/languages/gil": "Gilbertese",
"/languages/ijo": "Ijo",
"/languages/kam": "Kamba",
"/languages/nog": "Nogai",
"/languages/non": "Old Norse",
"/languages/tem": "Temne",
"/languages/arg": "Aragonese",
"/languages/arp": "Arapaho",
"/languages/arw": "Arawak",
"/languages/din": "Dinka",
"/languages/grb": "Grebo",
"/languages/kos": "Kusaie",
"/languages/lub": "Luba-Katanga",
"/languages/mnc": "Manchu",
"/languages/nyo": "Nyoro",
"/languages/rar": "Rarotongan",
"/languages/sel": "Selkup",
"/languages/tkl": "Tokelauan",
"/languages/tog": "Tonga (Nyasa)",
"/languages/tum": "Tumbuka",
"/languages/alt": "Altai",
"/languages/ase": "American Sign Language",
"/languages/ast": "Asturian",
"/languages/chk": "Chuukese",
"/languages/cos": "Corsican",
"/languages/ewo": "Ewondo",
"/languages/gor": "Gorontalo",
"/languages/hmo": "Hiri Motu",
"/languages/lol": "Mongo-Nkundu",
"/languages/lun": "Lunda",
"/languages/mas": "Masai",
"/languages/niu": "Niuean",
"/languages/rup": "Aromanian",
"/languages/sas": "Sasak",
"/languages/sio": "Siouan (Other)",
"/languages/sus": "Susu",
"/languages/zun": "Zuni",
"/languages/bat": "Baltic (Other)",
"/languages/car": "Carib",
"/languages/cha": "Chamorro",
"/languages/kab": "Kabyle",
"/languages/kau": "Kanuri",
"/languages/kho": "Khotanese",
"/languages/lua": "Luba-Lulua",
"/languages/mdf": "Moksha",
"/languages/nbl": "Ndebele (South Africa)",
"/languages/umb": "Umbundu",
"/languages/wak": "Wakashan languages",
"/languages/wal": "Wolayta",
"/languages/ale": "Aleut",
"/languages/bis": "Bislama",
"/languages/gba": "Gbaya",
"/languages/glv": "Manx",
"/languages/gul": "Gullah",
"/languages/ipk": "Inupiaq",
"/languages/krl": "Karelian",
"/languages/lam": "Lamba (Zambia and Congo)",
"/languages/sad": "Sandawe",
"/languages/sid": "Sidamo",
"/languages/snk": "Soninke",
"/languages/srn": "Sranan",
"/languages/suk": "Sukuma",
"/languages/ter": "Terena",
"/languages/tiv": "Tiv",
"/languages/tli": "Tlingit",
"/languages/tpi": "Tok Pisin",
"/languages/tvl": "Tuvaluan",
"/languages/yap": "Yapese",
"/languages/eka": "Ekajuk",
"/languages/hsb": "Upper Sorbian",
"/languages/ido": "Ido",
"/languages/kmb": "Kimbundu",
"/languages/kpe": "Kpelle",
"/languages/mwl": "Mirandese",
"/languages/nno": "Nynorsk",
"/languages/nub": "Nubian languages",
"/languages/osa": "Osage",
"/languages/sme": "Northern Sami",
"/languages/znd": "Zande languages",
}

View File

@ -1,4 +1,4 @@
''' using a bookwyrm instance as a source of book data '''
""" using a bookwyrm instance as a source of book data """
from functools import reduce
import operator
@ -10,10 +10,11 @@ from .abstract_connector import AbstractConnector, SearchResult
class Connector(AbstractConnector):
''' instantiate a connector '''
""" instantiate a connector """
# pylint: disable=arguments-differ
def search(self, query, min_confidence=0.1, raw=False):
''' search your local database '''
""" search your local database """
if not query:
return []
# first, try searching unqiue identifiers
@ -34,19 +35,18 @@ class Connector(AbstractConnector):
return search_results
def isbn_search(self, query, raw=False):
''' search your local database '''
""" search your local database """
if not query:
return []
filters = [{f: query} for f in ['isbn_10', 'isbn_13']]
filters = [{f: query} for f in ["isbn_10", "isbn_13"]]
results = models.Edition.objects.filter(
reduce(operator.or_, (Q(**f) for f in filters))
).distinct()
# when there are multiple editions of the same work, pick the default.
# it would be odd for this to happen.
results = results.filter(parent_work__default_edition__id=F('id')) \
or results
results = results.filter(parent_work__default_edition__id=F("id")) or results
search_results = []
for result in results:
@ -58,33 +58,30 @@ class Connector(AbstractConnector):
break
return search_results
def format_search_result(self, search_result):
return SearchResult(
title=search_result.title,
key=search_result.remote_id,
author=search_result.author_text,
year=search_result.published_date.year if \
search_result.published_date else None,
year=search_result.published_date.year
if search_result.published_date
else None,
connector=self,
confidence=search_result.rank if \
hasattr(search_result, 'rank') else 1,
confidence=search_result.rank if hasattr(search_result, "rank") else 1,
)
def format_isbn_search_result(self, search_result):
return SearchResult(
title=search_result.title,
key=search_result.remote_id,
author=search_result.author_text,
year=search_result.published_date.year if \
search_result.published_date else None,
year=search_result.published_date.year
if search_result.published_date
else None,
connector=self,
confidence=search_result.rank if \
hasattr(search_result, 'rank') else 1,
confidence=search_result.rank if hasattr(search_result, "rank") else 1,
)
def is_work_data(self, data):
pass
@ -98,11 +95,11 @@ class Connector(AbstractConnector):
return None
def parse_isbn_search_data(self, data):
''' it's already in the right format, don't even worry about it '''
""" it's already in the right format, don't even worry about it """
return data
def parse_search_data(self, data):
''' it's already in the right format, don't even worry about it '''
""" it's already in the right format, don't even worry about it """
return data
def expand_book_data(self, book):
@ -110,44 +107,47 @@ class Connector(AbstractConnector):
def search_identifiers(query):
''' tries remote_id, isbn; defined as dedupe fields on the model '''
filters = [{f.name: query} for f in models.Edition._meta.get_fields() \
if hasattr(f, 'deduplication_field') and f.deduplication_field]
""" tries remote_id, isbn; defined as dedupe fields on the model """
filters = [
{f.name: query}
for f in models.Edition._meta.get_fields()
if hasattr(f, "deduplication_field") and f.deduplication_field
]
results = models.Edition.objects.filter(
reduce(operator.or_, (Q(**f) for f in filters))
).distinct()
# when there are multiple editions of the same work, pick the default.
# it would be odd for this to happen.
return results.filter(parent_work__default_edition__id=F('id')) \
or results
return results.filter(parent_work__default_edition__id=F("id")) or results
def search_title_author(query, min_confidence):
''' searches for title and author '''
vector = SearchVector('title', weight='A') +\
SearchVector('subtitle', weight='B') +\
SearchVector('authors__name', weight='C') +\
SearchVector('series', weight='D')
""" searches for title and author """
vector = (
SearchVector("title", weight="A")
+ SearchVector("subtitle", weight="B")
+ SearchVector("authors__name", weight="C")
+ SearchVector("series", weight="D")
)
results = models.Edition.objects.annotate(
search=vector
).annotate(
rank=SearchRank(vector, query)
).filter(
rank__gt=min_confidence
).order_by('-rank')
results = (
models.Edition.objects.annotate(search=vector)
.annotate(rank=SearchRank(vector, query))
.filter(rank__gt=min_confidence)
.order_by("-rank")
)
# when there are multiple editions of the same work, pick the closest
editions_of_work = results.values(
'parent_work'
).annotate(
Count('parent_work')
).values_list('parent_work')
editions_of_work = (
results.values("parent_work")
.annotate(Count("parent_work"))
.values_list("parent_work")
)
for work_id in set(editions_of_work):
editions = results.filter(parent_work=work_id)
default = editions.filter(parent_work__default_edition=F('id'))
default = editions.filter(parent_work__default_edition=F("id"))
default_rank = default.first().rank if default.exists() else 0
# if mutliple books have the top rank, pick the default edition
if default_rank == editions.first().rank:

View File

@ -1,3 +1,3 @@
''' settings book data connectors '''
""" settings book data connectors """
CONNECTORS = ['openlibrary', 'self_connector', 'bookwyrm_connector']
CONNECTORS = ["openlibrary", "self_connector", "bookwyrm_connector"]