Merge branch 'main' into openlibrary-author-fields

This commit is contained in:
Mouse Reeve
2022-02-16 18:06:04 -08:00
90 changed files with 1841 additions and 2557 deletions

View File

@ -1,7 +1,11 @@
""" functionality outline for a book data connector """
from abc import ABC, abstractmethod
import imghdr
import ipaddress
import logging
from urllib.parse import urlparse
from django.core.files.base import ContentFile
from django.db import transaction
import requests
from requests.exceptions import RequestException
@ -248,6 +252,8 @@ def dict_from_mappings(data, mappings):
def get_data(url, params=None, timeout=10):
"""wrapper for request.get"""
# check if the url is blocked
raise_not_valid_url(url)
if models.FederatedServer.is_blocked(url):
raise ConnectorException(f"Attempting to load data from blocked url: {url}")
@ -280,6 +286,7 @@ def get_data(url, params=None, timeout=10):
def get_image(url, timeout=10):
"""wrapper for requesting an image"""
raise_not_valid_url(url)
try:
resp = requests.get(
url,
@ -290,10 +297,32 @@ def get_image(url, timeout=10):
)
except RequestException as err:
logger.exception(err)
return None
return None, None
if not resp.ok:
return None
return resp
return None, None
image_content = ContentFile(resp.content)
extension = imghdr.what(None, image_content.read())
if not extension:
logger.exception("File requested was not an image: %s", url)
return None, None
return image_content, extension
def raise_not_valid_url(url):
"""do some basic reality checks on the url"""
parsed = urlparse(url)
if not parsed.scheme in ["http", "https"]:
raise ConnectorException("Invalid scheme: ", url)
try:
ipaddress.ip_address(parsed.netloc)
raise ConnectorException("Provided url is an IP address: ", url)
except ValueError:
# it's not an IP address, which is good
pass
class Mapping: