Skip to content

Commit

Permalink
Merge pull request #40 from arthur-schnitzler/main
Browse files Browse the repository at this point in the history
added mgm-cmd to add gn_feature codes
  • Loading branch information
csae8092 authored Jan 13, 2024
2 parents 9c561aa + d344428 commit 795f1f1
Show file tree
Hide file tree
Showing 5 changed files with 171 additions and 3 deletions.
5 changes: 3 additions & 2 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[run]
branch = True
omit = dumper/management/commands/labels_to_uris.py

omit =
dumper/management/commands/labels_to_uris.py
dumper/management/commands/add_gn_feature_codes.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
</tr>
<tr>
<th>Ortstype</th>
<td>{{ object.kind }}</td>
<td>{% if object.kind.description %} {{ object.kind }} <small>{{ object.kind.description }}</small> {% else %} {{ object.kind }} {% endif %}</td>
</tr>
</table>

Expand Down
1 change: 1 addition & 0 deletions crontab
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
1 3 * * * root cd /opt/app && /usr/local/bin/python3 manage.py wikipedia_minter >> /var/log/cron.log 2>&1
30 3 * * * root cd /opt/app && /usr/local/bin/python3 manage.py wikidata_minter >> /var/log/cron.log 2>&1
1 4 * * * root cd /opt/app && /usr/local/bin/python3 manage.py dump_entities >> /var/log/cron.log 2>&1
1 6 * * * root cd /opt/app && /usr/local/bin/python3 manage.py add_gn_feature_codes >> /var/log/cron.log 2>&1
#
56 changes: 56 additions & 0 deletions dumper/management/commands/add_gn_feature_codes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import os
from tqdm import tqdm

from datetime import datetime

from acdh_geonames_utils.gn_client import gn_as_object
from django.conf import settings
from django.core.management.base import BaseCommand

from apis_core.apis_entities.models import Place
from apis_core.apis_vocabularies.models import PlaceType
from dumper.utils import write_report


class Command(BaseCommand):
help = "adds geonames feature codes to places with geoname uris"

def handle(self, *args, **kwargs):
start_time = datetime.now().strftime(settings.PMB_TIME_PATTERN)
print("start adding geoname feature codes")
cols = ["id", "uri__domain", "uri__uri", "kind__description"]
places = (
Place.objects.filter(uri__domain__icontains="geonames")
.exclude(kind__description__icontains="geonames")
.values_list(*cols)
)
places.count()
for x in tqdm(places[:250]):
place = Place.objects.get(id=x[0])
gn_uri = x[2]
try:
gn_obj = gn_as_object(gn_uri)
except: # noqa
gn_obj = {}
gn_obj["feature code"] = "kein passender Code gefunden"
code = gn_obj["feature code"]
try:
place_type, _ = PlaceType.objects.get_or_create(name=code)
except: # noqa
place_type = (
PlaceType.objects.filter(name=code)
.exclude(description=None)
.first()
)
place.kind = place_type
place.save()
places = (
Place.objects.filter(uri__domain__icontains="geonames")
.exclude(kind__description__icontains="geonames")
.values_list(*cols)
)
places.count()
print(place.id)
end_time = datetime.now().strftime(settings.PMB_TIME_PATTERN)
report = [os.path.basename(__file__), start_time, end_time]
write_report(report)
110 changes: 110 additions & 0 deletions fetch_gn_feature_codes__issue__79.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "080b69c6",
"metadata": {},
"source": [
"# fetching gnd feature code descriptions"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "85431463",
"metadata": {},
"outputs": [],
"source": [
"from acdh_tei_pyutils.tei import TeiReader\n",
"from acdh_tei_pyutils.utils import extract_fulltext\n",
"from tqdm import tqdm"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3ac05c4",
"metadata": {},
"outputs": [],
"source": [
"nsmap = {\n",
" \"gn\": \"https://www.geonames.org/ontology#\",\n",
" \"skos\": \"http://www.w3.org/2004/02/skos/core#\",\n",
" \"rdf\": \"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b393e39d",
"metadata": {},
"outputs": [],
"source": [
"doc = TeiReader(\"https://www.geonames.org/ontology/ontology_v3.3.rdf\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "00e1887c",
"metadata": {},
"outputs": [],
"source": [
"place_types = PlaceType.objects.filter(name__icontains=\"http\")\n",
"for x in tqdm(place_types):\n",
" name = x.name.split('#')[-1]\n",
" x.name = name\n",
" x.save() "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "04d1cd5f",
"metadata": {},
"outputs": [],
"source": [
"place_types = PlaceType.objects.exclude(name__icontains=\"(\").filter(name__icontains=\".\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab9370d4",
"metadata": {},
"outputs": [],
"source": [
"for x in tqdm(place_types):\n",
" xpath_expr = f'.//gn:Code[@rdf:about=\"#{x.name}\"]/skos:definition[@xml:lang=\"en\"]'\n",
" try:\n",
" description = doc.tree.xpath(xpath_expr, namespaces=nsmap)[0]\n",
" except IndexError:\n",
" continue\n",
" x.description = f\"{extract_fulltext(description).title()}. Quelle: https://www.geonames.org/ontology/ontology_v3.3.rdf\"\n",
" x.save()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Django Shell-Plus",
"language": "python",
"name": "django_extensions"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 795f1f1

Please sign in to comment.