From 63f9e83df675afb131fa4c3776028b7912e1e599 Mon Sep 17 00:00:00 2001 From: Birger Schacht Date: Mon, 16 Dec 2024 15:30:22 +0100 Subject: [PATCH] feat(utils): let the rdf parser return lists instead of strings ... and adapt rdfimport definitions as well as tests accordingly --- .../rdfimport/E21_PersonFromDNB.toml | 26 ++++++++++--- .../rdfimport/E74_GroupFromDNB.toml | 5 ++- apis_core/utils/rdf.py | 5 ++- apis_core/utils/test_rdf.py | 39 ++++++++++++------- 4 files changed, 54 insertions(+), 21 deletions(-) diff --git a/apis_core/apis_entities/rdfimport/E21_PersonFromDNB.toml b/apis_core/apis_entities/rdfimport/E21_PersonFromDNB.toml index 6bc16f897..8512dfdf5 100644 --- a/apis_core/apis_entities/rdfimport/E21_PersonFromDNB.toml +++ b/apis_core/apis_entities/rdfimport/E21_PersonFromDNB.toml @@ -9,27 +9,43 @@ sparql = """ PREFIX gndo: SELECT ?forename WHERE { - ?subject gndo:forename ?default_forename . - OPTIONAL { - ?subject2 gndo:preferredNameEntityForThePerson ?med . + OPTIONAL { + ?subject gndo:preferredNameEntityForThePerson ?med . ?med gndo:forename ?preferred_forename . } + OPTIONAL { + ?subject2 gndo:forename ?default_forename . + } BIND(COALESCE(?preferred_forename, ?default_forename) AS ?forename) } +GROUP BY ?subject """ +[[attributes]] +# alternative_names +sparql = """ +PREFIX gndo: +SELECT ?alternative_names +WHERE { + ?subject gndo:variantNameForThePerson ?alternative_names +} +""" + [[attributes]] # surname sparql = """ PREFIX gndo: SELECT ?surname WHERE { - ?subject gndo:surname ?default_surname . OPTIONAL { - ?subject2 gndo:preferredNameEntityForThePerson ?med . + ?subject gndo:preferredNameEntityForThePerson ?med . ?med gndo:surname ?preferred_surname . } + OPTIONAL { + ?subject2 gndo:surname ?default_surname . + } BIND(COALESCE(?preferred_surname, ?default_surname) AS ?surname) } +GROUP BY ?subject """ [[attributes]] # date_of_birth diff --git a/apis_core/apis_entities/rdfimport/E74_GroupFromDNB.toml b/apis_core/apis_entities/rdfimport/E74_GroupFromDNB.toml index ffc5316b1..c7d899352 100644 --- a/apis_core/apis_entities/rdfimport/E74_GroupFromDNB.toml +++ b/apis_core/apis_entities/rdfimport/E74_GroupFromDNB.toml @@ -10,10 +10,13 @@ sparql = """ PREFIX gndo: SELECT ?label WHERE { - ?subject gndo:preferredNameForTheCorporateBody ?name + OPTIONAL { + ?subject gndo:preferredNameForTheCorporateBody ?name + } OPTIONAL { ?subject gndo:variantNameForTheCorporateBody ?altName } BIND(COALESCE(?name, ?altName) AS ?label) } +GROUP BY ?subject """ diff --git a/apis_core/utils/rdf.py b/apis_core/utils/rdf.py index eda6b6796..c8ae90c84 100644 --- a/apis_core/utils/rdf.py +++ b/apis_core/utils/rdf.py @@ -4,6 +4,7 @@ import importlib import logging import re +from collections import defaultdict from typing import Tuple from rdflib import Graph @@ -66,7 +67,7 @@ def get_definition_and_attributes_from_uri( matching_definition = definition matching_definition["filename"] = str(key) break - model_attributes = dict() + model_attributes = defaultdict(list) if matching_definition: attributes = matching_definition.get("attributes", []) sparql_attributes = list(filter(lambda d: d.get("sparql"), attributes)) @@ -75,7 +76,7 @@ def get_definition_and_attributes_from_uri( for binding in result.bindings: # {rdflib.term.Variable('somekey'): rdflib.term.Literal('some value')} for key, value in binding.items(): - model_attributes[str(key)] = str(value) + model_attributes[str(key)].append(str(value)) else: raise AttributeError(f"No matching definition found for {uri}") return matching_definition, model_attributes diff --git a/apis_core/utils/test_rdf.py b/apis_core/utils/test_rdf.py index b1363ca52..26a9a61cf 100644 --- a/apis_core/utils/test_rdf.py +++ b/apis_core/utils/test_rdf.py @@ -31,9 +31,9 @@ class Meta: class RdfTest(TestCase): def test_get_definition_from_dict_place_from_geonames(self): achensee = { - "latitude": "47.5", - "longitude": "11.7", - "label": "Achensee", + "latitude": ["47.5"], + "longitude": ["11.7"], + "label": ["Achensee"], } # https://www.geonames.org/2783029/achensee.html uri = str(testdata / "achensee.rdf") @@ -43,7 +43,11 @@ def test_get_definition_from_dict_place_from_geonames(self): self.assertEqual(achensee, attributes) def test_get_definition_from_dict_place_from_dnb(self): - wien = {"label": "Wien", "latitude": "048.208199", "longitude": "016.371690"} + wien = { + "label": ["Wien"], + "latitude": ["048.208199"], + "longitude": ["016.371690"], + } # https://d-nb.info/gnd/4066009-6 uri = str(testdata / "wien.rdf") @@ -53,21 +57,30 @@ def test_get_definition_from_dict_place_from_dnb(self): def test_get_definition_from_dict_person_from_dnb(self): pierre = { - "forename": "Pierre", - "surname": "Ramus", - "date_of_birth": "1882-04-15", - "date_of_death": "1942", + "forename": ["Pierre"], + "surname": ["Ramus"], + "alternative_names": [ + "Ramus, Pʹer", + "Großmann, Rudolf", + "Grossmann, Rudolf", + "Grossman, Rudolf", + "Grossman, Rodolphe", + "Grossmann, Rodolphe", + "Libertarian, ...", + ], + "date_of_birth": ["1882-04-15"], + "date_of_death": ["1942"], } # https://d-nb.info/gnd/118833197 uri = str(testdata / "ramus.rdf") person = Person() defintion, attributes = rdf.get_definition_and_attributes_from_uri(uri, person) - self.assertEqual(pierre, attributes) + self.assertEqual(pierre, dict(attributes)) def test_get_definition_from_dict_institution_from_dnb(self): pierre_ges = { - "label": "Pierre-Ramus-Gesellschaft", + "label": ["Pierre-Ramus-Gesellschaft"], } # https://d-nb.info/gnd/415006-5 uri = str(testdata / "ramus_gesellschaft.rdf") @@ -76,11 +89,11 @@ def test_get_definition_from_dict_institution_from_dnb(self): defintion, attributes = rdf.get_definition_and_attributes_from_uri( uri, institution ) - self.assertEqual(pierre_ges, attributes) + self.assertEqual(pierre_ges, dict(attributes)) def test_get_definition_from_dict_institution_from_dnb2(self): pierre_ges = { - "label": "Akademie der Wissenschaften in Wien", + "label": ["Akademie der Wissenschaften in Wien"], } # https://d-nb.info/gnd/35077-1 uri = str(testdata / "oeaw.rdf") @@ -89,4 +102,4 @@ def test_get_definition_from_dict_institution_from_dnb2(self): defintion, attributes = rdf.get_definition_and_attributes_from_uri( uri, institution ) - self.assertEqual(pierre_ges, attributes) + self.assertEqual(pierre_ges, dict(attributes))