From dc0f3f09d891f593da37c9bb462029f59640b1c5 Mon Sep 17 00:00:00 2001 From: csae8092 Date: Sun, 14 Jan 2024 10:35:40 +0100 Subject: [PATCH] mint wikidata for geonames added collection to filter --- apis_core/apis_entities/list_view_event.py | 3 + .../apis_entities/list_view_institution.py | 3 + apis_core/apis_entities/list_view_person.py | 3 + apis_core/apis_entities/list_view_place.py | 3 + apis_core/apis_entities/list_view_work.py | 3 + dumper/management/commands/wikidata_minter.py | 81 ++++++++++--------- 6 files changed, 59 insertions(+), 37 deletions(-) diff --git a/apis_core/apis_entities/list_view_event.py b/apis_core/apis_entities/list_view_event.py index 652b167..b965405 100644 --- a/apis_core/apis_entities/list_view_event.py +++ b/apis_core/apis_entities/list_view_event.py @@ -8,6 +8,7 @@ from apis_core.apis_entities.models import Event from apis_core.apis_entities.base_filter import MyBaseFilter +from apis_core.apis_metainfo.models import Collection from apis_core.apis_vocabularies.models import ( EventEventRelation, EventType, @@ -87,6 +88,7 @@ class EventListFilter(MyBaseFilter): url="/apis/vocabularies/autocomplete/eventtype/normal/", ), ) + collection = django_filters.ModelChoiceFilter(queryset=Collection.objects.all()) def related_work_filter(self, qs, name, value): rels = get_child_classes( @@ -142,6 +144,7 @@ def __init__(self, *args, **kwargs): "name", "kind", "year_of_creation", + "collection", css_id="more", ), AccordionGroup( diff --git a/apis_core/apis_entities/list_view_institution.py b/apis_core/apis_entities/list_view_institution.py index 9ad3ef6..67fa71e 100644 --- a/apis_core/apis_entities/list_view_institution.py +++ b/apis_core/apis_entities/list_view_institution.py @@ -7,6 +7,7 @@ from dal import autocomplete from apis_core.apis_entities.models import Institution +from apis_core.apis_metainfo.models import Collection from apis_core.apis_entities.base_filter import MyBaseFilter from apis_core.apis_vocabularies.models import ( InstitutionEventRelation, @@ -93,6 +94,7 @@ class InstitutionListFilter(MyBaseFilter): url="/apis/vocabularies/autocomplete/institutiontype/normal/", ), ) + collection = django_filters.ModelChoiceFilter(queryset=Collection.objects.all()) def related_event_filter(self, qs, name, value): rels = get_child_classes( @@ -161,6 +163,7 @@ def __init__(self, *args, **kwargs): "name", "kind", "year_of_creation", + "collection", css_id="more", ), AccordionGroup( diff --git a/apis_core/apis_entities/list_view_person.py b/apis_core/apis_entities/list_view_person.py index 21cadc0..39cbe67 100644 --- a/apis_core/apis_entities/list_view_person.py +++ b/apis_core/apis_entities/list_view_person.py @@ -8,6 +8,7 @@ from apis_core.apis_entities.models import Person from apis_core.apis_entities.base_filter import MyBaseFilter +from apis_core.apis_metainfo.models import Collection from apis_core.apis_vocabularies.models import ( PersonInstitutionRelation, PersonPersonRelation, @@ -99,6 +100,7 @@ class PersonListFilter(MyBaseFilter): help_text="Name einer Institution und die Art des Beziehung, z.B. 'Znanie' und 'besitzt'", method="related_institution_filter", ) + collection = django_filters.ModelChoiceFilter(queryset=Collection.objects.all()) def related_work_filter(self, qs, name, value): rels = get_child_classes( @@ -163,6 +165,7 @@ def __init__(self, *args, **kwargs): "gender", "birth_year", "death_year", + "collection", css_id="more", ), AccordionGroup( diff --git a/apis_core/apis_entities/list_view_place.py b/apis_core/apis_entities/list_view_place.py index 53e0552..f6c12b2 100644 --- a/apis_core/apis_entities/list_view_place.py +++ b/apis_core/apis_entities/list_view_place.py @@ -8,6 +8,7 @@ from apis_core.apis_entities.models import Place from apis_core.apis_entities.base_filter import MyBaseFilter +from apis_core.apis_metainfo.models import Collection from apis_core.apis_vocabularies.models import ( PersonPlaceRelation, PlaceType, @@ -65,6 +66,7 @@ class PlaceListFilter(MyBaseFilter): url="/apis/vocabularies/autocomplete/placetype/normal/", ), ) + collection = django_filters.ModelChoiceFilter(queryset=Collection.objects.all()) def related_work_filter(self, qs, name, value): rels = get_child_classes( @@ -112,6 +114,7 @@ def __init__(self, *args, **kwargs): "Beziehungen", "related_with_person", "related_with_work", + "collection", css_id="admin_search", ), ) diff --git a/apis_core/apis_entities/list_view_work.py b/apis_core/apis_entities/list_view_work.py index e0bb2b8..ea67746 100644 --- a/apis_core/apis_entities/list_view_work.py +++ b/apis_core/apis_entities/list_view_work.py @@ -8,6 +8,7 @@ from apis_core.apis_entities.models import Work from apis_core.apis_entities.base_filter import MyBaseFilter +from apis_core.apis_metainfo.models import Collection from apis_core.apis_vocabularies.models import ( InstitutionWorkRelation, PersonWorkRelation, @@ -76,6 +77,7 @@ class WorkListFilter(MyBaseFilter): url="/apis/vocabularies/autocomplete/worktype/normal/", ), ) + collection = django_filters.ModelChoiceFilter(queryset=Collection.objects.all()) def related_work_filter(self, qs, name, value): rels = get_child_classes( @@ -117,6 +119,7 @@ def __init__(self, *args, **kwargs): "name", "kind", "year_of_creation", + "collection", css_id="more", ), AccordionGroup( diff --git a/dumper/management/commands/wikidata_minter.py b/dumper/management/commands/wikidata_minter.py index b05c739..795aca9 100644 --- a/dumper/management/commands/wikidata_minter.py +++ b/dumper/management/commands/wikidata_minter.py @@ -2,7 +2,7 @@ import time from datetime import datetime -from acdh_id_reconciler import gnd_to_wikidata +from acdh_id_reconciler import gnd_to_wikidata, geonames_to_gnd from AcdhArcheAssets.uri_norm_rules import get_normalized_uri from django.conf import settings from django.core.management.base import BaseCommand @@ -16,43 +16,50 @@ class Command(BaseCommand): help = "mint WikiData IDs for GND-URIs" def handle(self, *args, **kwargs): - start_time = datetime.now().strftime(settings.PMB_TIME_PATTERN) LIMIT = 100 USER_AGENT_PMB = "pmb (https://pmb.acdh.oeaw.ac.at)" col, _ = Collection.objects.get_or_create(name="No WikiData-ID found") + types = ["d-nb.info", "geonames"] + for uri_type in types: + print(f"processing URIS with type: {uri_type}") + start_time = datetime.now().strftime(settings.PMB_TIME_PATTERN) + ents = ( + TempEntityClass.objects.filter(uri__uri__icontains=uri_type) + .exclude(uri__uri__icontains="wikidata") + .exclude(collection=col) + ) + uris_to_process = Uri.objects.filter(entity__in=ents).filter( + uri__icontains=uri_type + ) - ents = ( - TempEntityClass.objects.filter(uri__uri__icontains="d-nb.info") - .exclude(uri__uri__icontains="wikidata") - .exclude(collection=col) - ) - uris_to_process = Uri.objects.filter(entity__in=ents).filter( - uri__icontains="d-nb.info" - ) - - print(f"All in all {uris_to_process.count()} GND-Entities without Wikidata") - for x in tqdm(uris_to_process.order_by("id")[:LIMIT], total=LIMIT): - time.sleep(1) - ent = x.entity - try: - results = gnd_to_wikidata(x.uri, USER_AGENT_PMB) - except Exception as e: - print(x, ent.id, e) - ent.collection.add(col) - continue - wd_url = get_normalized_uri(results["wikidata"]) - wd_uri, _ = Uri.objects.get_or_create(uri=wd_url) - wd_uri.entity = ent - wd_uri.domain = "wikidata" - wd_uri.save() - ents = TempEntityClass.objects.filter(uri__uri__icontains="d-nb.info").exclude( - uri__uri__icontains="wikidata" - ) - uris_to_process = Uri.objects.filter(entity__in=ents).filter( - uri__icontains="d-nb.info" - ) - mgs = f"{uris_to_process.count()} left" - print(mgs) - end_time = datetime.now().strftime(settings.PMB_TIME_PATTERN) - report = [os.path.basename(__file__), start_time, end_time] - write_report(report) + print( + f"All in all {uris_to_process.count()} {uri_type}-Entities without Wikidata" + ) + for x in tqdm(uris_to_process.order_by("id")[:LIMIT], total=LIMIT): + time.sleep(1) + ent = x.entity + try: + if uri_type == "d-nb.info": + results = gnd_to_wikidata(x.uri, USER_AGENT_PMB) + else: + results = geonames_to_gnd(x.uri, USER_AGENT_PMB) + except Exception as e: + print(x, ent.id, e) + ent.collection.add(col) + continue + wd_url = get_normalized_uri(results["wikidata"]) + wd_uri, _ = Uri.objects.get_or_create(uri=wd_url) + wd_uri.entity = ent + wd_uri.domain = "wikidata" + wd_uri.save() + ents = TempEntityClass.objects.filter(uri__uri__icontains=uri_type).exclude( + uri__uri__icontains="wikidata" + ) + uris_to_process = Uri.objects.filter(entity__in=ents).filter( + uri__icontains=uri_type + ) + mgs = f"{uris_to_process.count()} left" + print(mgs) + end_time = datetime.now().strftime(settings.PMB_TIME_PATTERN) + report = [os.path.basename(__file__), start_time, end_time] + write_report(report)