From 4f96182fd704ede07a41db60e09560439d103428 Mon Sep 17 00:00:00 2001 From: Birger Schacht Date: Mon, 16 Oct 2023 15:42:49 +0200 Subject: [PATCH] refactor: replace yaml with toml for rdf parser --- .../RDF_default_settings.toml | 244 ++++++++++++++++++ apis_core/utils/rdf.py | 4 +- apis_core/utils/settings.py | 2 +- 3 files changed, 247 insertions(+), 3 deletions(-) create mode 100644 apis_core/default_settings/RDF_default_settings.toml diff --git a/apis_core/default_settings/RDF_default_settings.toml b/apis_core/default_settings/RDF_default_settings.toml new file mode 100644 index 000000000..30e01657f --- /dev/null +++ b/apis_core/default_settings/RDF_default_settings.toml @@ -0,0 +1,244 @@ +######################################### +# Create an entity `apis_ontology.Place` +# from a geonames RDF endpoint +########################################## +[PlaceFromGeonames] +model = "apis_ontology.Place" +filter_sparql = """ +PREFIX gn: +ASK { + ?subject gn:featureClass . + ?subject gn:featureCode . +} +""" +[[PlaceFromGeonames.attributes]] +# name +sparql = """ +PREFIX gn: +SELECT ?name +WHERE +{ + ?subject gn:name|gn:officialName|gn:alternateName ?prefName + FILTER (LANGMATCHES(LANG(?prefName), "de") || LANGMATCHES(LANG(?prefName), "en") || LANG(?prefName) = "") + BIND(?prefName AS ?name) +} +ORDER BY ?lang +""" +[[PlaceFromGeonames.attributes]] +# alternative_label +sparql = """ +PREFIX gn: +SELECT ?altName (LANG(?altName) AS ?lang) +WHERE { + ?subject gn:alternateName ?altName + FILTER (LANGMATCHES(LANG(?altName), "de") || LANGMATCHES(LANG(?altName), "en") || LANG(?prefName) = "") +} +""" +[[PlaceFromGeonames.attributes]] +# kind +sparql = """ +PREFIX gn: +SELECT ?kind +WHERE { + ?subject gn:featureCode ?kind +} +""" +[[PlaceFromGeonames.attributes]] +# lat +sparql = """ +PREFIX wgs84_pos: +SELECT ?lat +WHERE { + ?subject wgs84_pos:lat ?lat. + ?subject wgs84_pos:long ?long +} +""" +[[PlaceFromGeonames.attributes]] +# long +sparql = """ +PREFIX wgs84_pos: +SELECT ?long +WHERE { + ?subject wgs84_pos:lat ?lat. + ?subject wgs84_pos:long ?long +} +""" +[[PlaceFromGeonames.attributes]] +# parent +sparql = """ +PREFIX gn: +SELECT ?parent +WHERE { + ?subject gn:parentCountry ?parent +} +""" + +######################################### +# Create an entity `apis_ontology.Place` +# from a d-nb.info RDF endpoint +######################################### +[PlaceFromDNB] +model = "apis_ontology.Place" +filter_sparql = """ +PREFIX gndo: +ASK { + ?subject gndo:preferredNameForThePlaceOrGeographicName ?object . +} +""" +[[PlaceFromDNB.attributes]] +# name +sparql = """ +PREFIX gndo: +SELECT ?name +WHERE { + ?subject gndo:preferredNameForThePlaceOrGeographicName ?prefName + BIND(?prefName AS ?name) +} +""" +[[PlaceFromDNB.attributes]] +# lon +sparql = """ +PREFIX geo: +SELECT ?lon +WHERE { + ?subject geo:hasGeometry ?geo1 . + ?geo1 geo:asWKT ?point . + BIND(REPLACE(str(?point), "Point . \\+(\\d+\\.\\d+) .+$", "$1") as ?lon) + } +""" +[[PlaceFromDNB.attributes]] +# lat +sparql = """ +PREFIX geo: +SELECT ?lat +WHERE { + ?subject geo:hasGeometry ?geo1 . + ?geo1 geo:asWKT ?point . + BIND(REPLACE(str(?point), "Point . \\+(\\d+\\.\\d+) \\+(\\d+\\.\\d+) .$", "$2") as ?lat) + } +""" + +######################################### +# Create an entity `apis_ontology.Person` +# from a d-nb.info RDF endpoint +######################################### +[PersonFromDNB] +model = "apis_ontology.Person" +filter_sparql = """ +PREFIX gndo: +ASK { + ?subject gndo:preferredNameForThePerson ?object . +} +""" +[[PersonFromDNB.attributes]] +# name +sparql = """ +PREFIX gndo: +SELECT ?name +WHERE { + ?subject gndo:preferredNameForThePerson ?name . + OPTIONAL { + ?subject gndo:preferredNameEntityForThePerson ?med . + ?med gndo:forename ?first_name. + ?med gndo:surname ?name2 . + BIND(?name2 as ?name) + } + BIND(CONCAT(?name, ",", ?first_name) AS ?name) +} +""" +[[PersonFromDNB.attributes]] +# profession +sparql = """ +PREFIX gndo: +SELECT ?profession +WHERE { + ?subject gndo:professionOrOccupation ?profession +} +""" +[[PersonFromDNB.attributes]] +# date_of_birth +sparql = """ +PREFIX gndo: +SELECT ?date_of_birth +WHERE { + ?subject gndo:dateOfBirth ?start_date_written + BIND(?start_date_written AS ?date_of_birth) +} +""" +[[PersonFromDNB.attributes]] +# date_of_death +sparql = """ +PREFIX gndo: +SELECT ?date_of_death +WHERE { + ?subject gndo:dateOfDeath ?end_date_written + BIND(?end_date_written AS ?date_of_death) +} +""" +[[PersonFromDNB.attributes]] +# place_of_birth +sparql = """ +PREFIX gndo: +SELECT ?place_of_birth +WHERE { + ?subject gndo:placeOfBirth ?place_of_birth +} +""" + +############################################## +# Create an entity `apis_ontology.Institution` +# from a d-nb.info RDF endpoint +############################################## +[InstitutionFromDNB] +model = "apis_ontology.Institution" +filter_sparql = """ +PREFIX gndo: +ASK { + ?subject gndo:preferredNameForTheCorporateBody ?object . +} +""" +[[InstitutionFromDNB.attributes]] +# name +sparql = """ +PREFIX gndo: +SELECT ?name +WHERE { + ?subject gndo:preferredNameForTheCorporateBody ?name +} +""" +[[InstitutionFromDNB.attributes]] +# altName +sparql = """ +PREFIX gndo: +SELECT ?altName +WHERE { + ?subject gndo:variantNameForTheCorporateBody ?altName +} +""" +[[InstitutionFromDNB.attributes]] +# place +sparql = """ +PREFIX gndo: +SELECT ?place +WHERE { + ?subject gndo:placeOfBusiness ?place +} +""" +[[InstitutionFromDNB.attributes]] +# start_date_written +sparql = """ +PREFIX gndo: +SELECT ?start_date_written +WHERE { + ?subject gndo:dateOfEstablishment ?start_date_written +} +""" +[[InstitutionFromDNB.attributes]] +# end_date_written +sparql = """ +PREFIX gndo: +SELECT ?end_date_written +WHERE { + ?subject gndo:dateOfTermination ?end_date_written +} +""" diff --git a/apis_core/utils/rdf.py b/apis_core/utils/rdf.py index 3c5af593a..10fb0a9ba 100644 --- a/apis_core/utils/rdf.py +++ b/apis_core/utils/rdf.py @@ -3,8 +3,8 @@ import pathlib import logging +import tomllib -from yaml import safe_load from rdflib import Graph from typing import Tuple @@ -24,7 +24,7 @@ def get_modelname_and_dict_from_uri( graph.parse(uri) settings_file = settings_file or rdf_object_mapping_file() - settings = safe_load(settings_file.read_text()) + settings = tomllib.load(settings_file.read_text()) matching_definition = None for key, definition in settings.items(): if set(definition_must_have_keys) <= set(definition.keys()): diff --git a/apis_core/utils/settings.py b/apis_core/utils/settings.py index e3a6e6d78..f5571eaa9 100644 --- a/apis_core/utils/settings.py +++ b/apis_core/utils/settings.py @@ -17,7 +17,7 @@ def clean_uri_mapping_file() -> Path: def rdf_object_mapping_file() -> Path: - default = default_settings() / "RDF_default_settings.yml" + default = default_settings() / "RDF_default_settings.toml" mapping_file = getattr(settings, "APIS_RDF_YAML_SETTINGS", default) return Path(mapping_file)