2021-10-29 06:14:32 -04:00
|
|
|
"""ISNI author checking utilities"""
|
2021-10-29 01:12:31 -04:00
|
|
|
import xml.etree.ElementTree as ET
|
2021-10-29 06:14:32 -04:00
|
|
|
import requests
|
2021-10-29 01:12:31 -04:00
|
|
|
|
|
|
|
# get data
|
2021-10-29 06:14:32 -04:00
|
|
|
BASE_STRING = "http://isni.oclc.org/sru/?query=pica.na+%3D+%22"
|
2021-10-29 18:24:42 -04:00
|
|
|
#pylint: disable=line-too-long
|
2021-10-29 06:14:32 -04:00
|
|
|
SUFFIX_STRING = "%22&version=1.1&operation=searchRetrieve&recordSchema=isni-b&maximumRecords=10&startRecord=1&recordPacking=xml&sortKeys=RLV%2Cpica%2C0%2C%2C"
|
2021-10-29 01:12:31 -04:00
|
|
|
|
|
|
|
|
|
|
|
def url_stringify(string):
|
2021-10-29 06:14:32 -04:00
|
|
|
"""replace spaces for url encoding"""
|
2021-10-29 01:12:31 -04:00
|
|
|
return string.replace(" ", "+")
|
|
|
|
|
|
|
|
|
|
|
|
def find_authors_by_name(names):
|
2021-10-29 06:14:32 -04:00
|
|
|
"""Query the ISNI database for an author"""
|
2021-10-29 01:12:31 -04:00
|
|
|
names = url_stringify(names)
|
2021-10-29 06:14:32 -04:00
|
|
|
query = BASE_STRING + names + SUFFIX_STRING
|
|
|
|
result = requests.get(query)
|
2021-10-29 06:00:35 -04:00
|
|
|
# the OCLC ISNI server asserts the payload is encoded
|
|
|
|
# in latin1, but we know better
|
2021-10-29 06:14:32 -04:00
|
|
|
result.encoding = "utf-8"
|
|
|
|
payload = result.text
|
2021-10-29 06:00:35 -04:00
|
|
|
# parse xml
|
2021-10-29 01:12:31 -04:00
|
|
|
root = ET.fromstring(payload)
|
|
|
|
|
|
|
|
# build list of possible authors
|
|
|
|
possible_authors = []
|
2021-10-29 06:14:32 -04:00
|
|
|
for element in root.iter("responseRecord"):
|
2021-10-29 01:12:31 -04:00
|
|
|
|
2021-10-29 06:14:32 -04:00
|
|
|
author = {}
|
|
|
|
author["uri"] = element.find(".//isniURI").text
|
2021-10-29 18:24:42 -04:00
|
|
|
# NOTE: this will often be incorrect, many naming systems
|
|
|
|
# list "surname" before personal name
|
2021-10-29 06:14:32 -04:00
|
|
|
personal_name = element.find(".//forename/..")
|
2021-10-29 18:24:42 -04:00
|
|
|
description = element.find(".//nameTitle")
|
2021-10-29 01:12:31 -04:00
|
|
|
if personal_name:
|
2021-10-29 18:24:42 -04:00
|
|
|
forename = personal_name.find(".//forename")
|
|
|
|
surname = personal_name.find(".//surname")
|
2021-10-29 01:12:31 -04:00
|
|
|
author["name"] = forename.text + " " + surname.text
|
2021-10-29 18:24:42 -04:00
|
|
|
if description is not None:
|
|
|
|
author["description"] = description.text
|
2021-10-29 01:12:31 -04:00
|
|
|
|
|
|
|
possible_authors.append(author)
|
|
|
|
|
|
|
|
return possible_authors
|