diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 0ab135b8..d8b9c630 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -1,7 +1,9 @@ """ functionality outline for a book data connector """ from abc import ABC, abstractmethod import imghdr +import ipaddress import logging +from urllib.parse import urlparse from django.core.files.base import ContentFile from django.db import transaction @@ -250,6 +252,8 @@ def dict_from_mappings(data, mappings): def get_data(url, params=None, timeout=10): """wrapper for request.get""" # check if the url is blocked + raise_not_valid_url(url) + if models.FederatedServer.is_blocked(url): raise ConnectorException(f"Attempting to load data from blocked url: {url}") @@ -282,6 +286,7 @@ def get_data(url, params=None, timeout=10): def get_image(url, timeout=10): """wrapper for requesting an image""" + raise_not_valid_url(url) try: resp = requests.get( url, @@ -306,6 +311,20 @@ def get_image(url, timeout=10): return image_content, extension +def raise_not_valid_url(url): + """do some basic reality checks on the url""" + parsed = urlparse(url) + if not parsed.scheme in ["http", "https"]: + raise ConnectorException("Invalid scheme: ", url) + + try: + ipaddress.ip_address(parsed.netloc) + raise ConnectorException("Provided url is an IP address: ", url) + except ValueError: + # it's not an IP address, which is good + pass + + class Mapping: """associate a local database field with a field in an external dataset""" diff --git a/bookwyrm/tests/connectors/test_abstract_connector.py b/bookwyrm/tests/connectors/test_abstract_connector.py index 90e77b79..901cb5af 100644 --- a/bookwyrm/tests/connectors/test_abstract_connector.py +++ b/bookwyrm/tests/connectors/test_abstract_connector.py @@ -4,8 +4,8 @@ from django.test import TestCase import responses from bookwyrm import models -from bookwyrm.connectors import abstract_connector -from bookwyrm.connectors.abstract_connector import Mapping +from bookwyrm.connectors import abstract_connector, ConnectorException +from bookwyrm.connectors.abstract_connector import Mapping, get_data from bookwyrm.settings import DOMAIN @@ -163,3 +163,11 @@ class AbstractConnector(TestCase): author.refresh_from_db() self.assertEqual(author.name, "Test") self.assertEqual(author.isni, "hi") + + def test_get_data_invalid_url(self): + """load json data from an arbitrary url""" + with self.assertRaises(ConnectorException): + get_data("file://hello.com/image/jpg") + + with self.assertRaises(ConnectorException): + get_data("http://127.0.0.1/image/jpg")