From bd0d617ab16d3e5938c2683bb7d86d9dbb784b3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20Schl=C3=B6gl?= Date: Fri, 13 Dec 2024 09:09:58 +0100 Subject: [PATCH] fix: add logic for sameAs resolves #401 --- apis_ontology/importers.py | 40 +++++++++++--- .../rdfimport/InstitutionFromDNB.toml | 10 ++++ apis_ontology/rdfimport/PersonFromDNB.toml | 10 ++++ apis_ontology/rdfimport/PlaceFromDNB.toml | 53 +++++++++++++++++++ 4 files changed, 106 insertions(+), 7 deletions(-) create mode 100644 apis_ontology/rdfimport/PlaceFromDNB.toml diff --git a/apis_ontology/importers.py b/apis_ontology/importers.py index cafccdf..5b03765 100644 --- a/apis_ontology/importers.py +++ b/apis_ontology/importers.py @@ -1,6 +1,9 @@ from django.apps import apps +from django.core.exceptions import ImproperlyConfigured +from django.db.utils import IntegrityError from apis_core.generic.importers import GenericModelImporter from apis_core.utils.helpers import create_object_from_uri +from apis_core.apis_metainfo.models import Uri class BaseEntityImporter(GenericModelImporter): @@ -9,19 +12,38 @@ class BaseEntityImporter(GenericModelImporter): def create_instance(self): data = self.get_data(drop_unknown_fields=False) + if "sameas" in data: + data["sameas"] = data["sameas"].split("|") + sa = Uri.objects.filter(uri__in=data["sameas"]) + if sa.count() == 1: + return sa.first().root_object + elif sa.count() > 1: + raise IntegrityError( + f"Multiple objects found for sameAs URIs {data['sames']}. " + f"This indicates a data integrity problem as these URIs should be unique." + ) modelfields = [field.name for field in self.model._meta.fields] data_croped = {key: data[key] for key in data if key in modelfields} subj = self.model.objects.create(**data_croped) + if "sameas" in data: + for uri in data["sameas"]: + Uri.objects.create(uri=uri, root_object_id=subj.id) related_keys = [ (x, x.split("__")[1], x.split("__")[2]) for x in data.keys() if "__" in x ] - for rk in related_keys: - key, obj, rel = rk - RelatedModel = apps.get_model("apis_ontology", obj) - RelationType = apps.get_model("apis_ontology", rel) - if key in data: - related_obj = create_object_from_uri(data[key], RelatedModel) - RelationType.objects.create(subj=subj, obj=related_obj) + try: + for rk in related_keys: + key, obj, rel = rk + RelatedModel = apps.get_model("apis_ontology", obj) + RelationType = apps.get_model("apis_ontology", rel) + if key in data: + related_obj = create_object_from_uri(data[key], RelatedModel) + RelationType.objects.create(subj=subj, obj=related_obj) + except: # noqa: E722 + subj.delete() + raise ImproperlyConfigured( + f"Error in creating related Objects for {self.model.__class__.__name__}" + ) return subj @@ -39,3 +61,7 @@ def mangle_data(self, data): class InstitutionImporter(BaseEntityImporter): pass + + +class PlaceImporter(BaseEntityImporter): + pass diff --git a/apis_ontology/rdfimport/InstitutionFromDNB.toml b/apis_ontology/rdfimport/InstitutionFromDNB.toml index 50f9444..6120a2f 100644 --- a/apis_ontology/rdfimport/InstitutionFromDNB.toml +++ b/apis_ontology/rdfimport/InstitutionFromDNB.toml @@ -30,3 +30,13 @@ WHERE { ?subject gndo:dateOfTermination ?end_date_written } """ +[[attributes]] +# sameAs +sparql = ''' +PREFIX owl: +SELECT (GROUP_CONCAT(?sameas_pre; separator='|') as ?sameas) +WHERE { + ?subject owl:sameAs ?sameas_pre +} +GROUP BY ?subject +''' diff --git a/apis_ontology/rdfimport/PersonFromDNB.toml b/apis_ontology/rdfimport/PersonFromDNB.toml index 989ec81..e020764 100644 --- a/apis_ontology/rdfimport/PersonFromDNB.toml +++ b/apis_ontology/rdfimport/PersonFromDNB.toml @@ -90,3 +90,13 @@ WHERE { ?subject gndo:placeOfDeath ?place_of_death__Place__StarbIn } """ +[[attributes]] +# sameAs +sparql = ''' +PREFIX owl: +SELECT (GROUP_CONCAT(?sameas_pre; separator='|') as ?sameas) +WHERE { + ?subject owl:sameAs ?sameas_pre +} +GROUP BY ?subject +''' diff --git a/apis_ontology/rdfimport/PlaceFromDNB.toml b/apis_ontology/rdfimport/PlaceFromDNB.toml new file mode 100644 index 0000000..78c7b79 --- /dev/null +++ b/apis_ontology/rdfimport/PlaceFromDNB.toml @@ -0,0 +1,53 @@ +##################################################### +# Create an E53_Place from a d-nb.info RDF endpoint # +# the second regex is for testing +regex = "https://d-nb.info.*|/.*wien.rdf" +superclass = "apis_ontology.models.Place" +sameas = """ +PREFIX owl: +SELECT ?sameas +WHERE { +?subject owl:sameAs ?sameas +} +""" +[[attributes]] +# label +sparql = """ +PREFIX gndo: +SELECT ?label +WHERE { + ?subject gndo:preferredNameForThePlaceOrGeographicName ?label +} +""" +[[attributes]] +# longitude +sparql = ''' +PREFIX geo: +SELECT ?longitude +WHERE { + ?subject geo:hasGeometry ?geo1 . + ?geo1 geo:asWKT ?point . + BIND(REPLACE(str(?point), "Point \\( \\+?(-?\\d+.\\d+).*", "$1") as ?longitude) + } +''' +[[attributes]] +# latitude +sparql = ''' +PREFIX geo: +SELECT ?latitude +WHERE { + ?subject geo:hasGeometry ?geo1 . + ?geo1 geo:asWKT ?point . + BIND(REPLACE(str(?point), "^Point\\s*\\(\\s*[+-]?\\d+\\.\\d+\\s+([+-]?\\d+\\.\\d+)\\s*\\)$", "$1") as ?latitude) + } +''' +[[attributes]] +# sameAs +sparql = ''' +PREFIX owl: +SELECT (GROUP_CONCAT(?sameas_pre; separator='|') as ?sameas) +WHERE { + ?subject owl:sameAs ?sameas_pre +} +GROUP BY ?subject +'''