From 4f42978bc45273209de3008189b78ecf8d7c2b3f Mon Sep 17 00:00:00 2001 From: csae8092 Date: Tue, 23 Jan 2024 08:11:39 +0100 Subject: [PATCH 1/4] closes #111 and better imgage URLS --- .../management/commands/fetch_images.py | 2 +- .../detail_views/entity_detail_generic.html | 5 + apis_core/apis_entities/tests.py | 6 + ...mpentityclass_img_last_checked_and_more.py | 6 +- apis_core/apis_metainfo/models.py | 7 + issue__112_fix_image_urls.ipynb | 138 ++++++++++++++++++ requirements.txt | 2 +- 7 files changed, 162 insertions(+), 4 deletions(-) create mode 100644 issue__112_fix_image_urls.ipynb diff --git a/apis_core/apis_entities/management/commands/fetch_images.py b/apis_core/apis_entities/management/commands/fetch_images.py index b1a5491..0e77424 100644 --- a/apis_core/apis_entities/management/commands/fetch_images.py +++ b/apis_core/apis_entities/management/commands/fetch_images.py @@ -10,7 +10,7 @@ from apis_core.apis_entities.models import Person from dumper.utils import write_report -warnings.filterwarnings('ignore') +warnings.filterwarnings("ignore") class Command(BaseCommand): diff --git a/apis_core/apis_entities/templates/apis_entities/detail_views/entity_detail_generic.html b/apis_core/apis_entities/templates/apis_entities/detail_views/entity_detail_generic.html index e05712c..e481a2f 100644 --- a/apis_core/apis_entities/templates/apis_entities/detail_views/entity_detail_generic.html +++ b/apis_core/apis_entities/templates/apis_entities/detail_views/entity_detail_generic.html @@ -62,7 +62,12 @@

{% if object.img_url %}
Bild von {{ object }} + {% if object.img_credit %} +
{{ object.img_credit }}
+ {% endif %}
+ + {% endif %} {% block info-table %} diff --git a/apis_core/apis_entities/tests.py b/apis_core/apis_entities/tests.py index ed1bdc9..64b59a1 100644 --- a/apis_core/apis_entities/tests.py +++ b/apis_core/apis_entities/tests.py @@ -370,3 +370,9 @@ def test_026_fetch_image(self): entity = import_from_normdata(grillparzer, "person") entity.fetch_image() self.assertTrue(entity.img_url) + self.assertTrue("Wikimedia Commons", entity.img_credit) + + def test_027_img_credit(self): + entity = import_from_normdata("https://www.wikidata.org/wiki/Q76483", "person") + self.assertTrue(entity.img_url) + self.assertTrue("Wikimedia Commons", entity.img_credit) diff --git a/apis_core/apis_metainfo/migrations/0004_tempentityclass_img_last_checked_and_more.py b/apis_core/apis_metainfo/migrations/0004_tempentityclass_img_last_checked_and_more.py index e9fd600..cc2eb9b 100644 --- a/apis_core/apis_metainfo/migrations/0004_tempentityclass_img_last_checked_and_more.py +++ b/apis_core/apis_metainfo/migrations/0004_tempentityclass_img_last_checked_and_more.py @@ -13,8 +13,10 @@ class Migration(migrations.Migration): model_name="tempentityclass", name="img_last_checked", field=models.DateTimeField( - blank=True, null=True, verbose_name="geprüft am", - help_text="Datum an dem die Bild-URL eingetragen wurde." + blank=True, + null=True, + verbose_name="geprüft am", + help_text="Datum an dem die Bild-URL eingetragen wurde.", ), ), migrations.AddField( diff --git a/apis_core/apis_metainfo/models.py b/apis_core/apis_metainfo/models.py index 1524d81..ef19c6c 100644 --- a/apis_core/apis_metainfo/models.py +++ b/apis_core/apis_metainfo/models.py @@ -155,6 +155,13 @@ def fetch_image(self): self.save() return self + def img_credit(self): + credit = None + if self.img_url is not None: + if "commons.wikimedia.org/w/index" in self.img_url: + credit = "Wikimedia Commons" + return credit + @classmethod def get_listview_url(self): entity = self.__name__.lower() diff --git a/issue__112_fix_image_urls.ipynb b/issue__112_fix_image_urls.ipynb new file mode 100644 index 0000000..18edf3b --- /dev/null +++ b/issue__112_fix_image_urls.ipynb @@ -0,0 +1,138 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 23, + "id": "eda7419e", + "metadata": {}, + "outputs": [], + "source": [ + "from urllib.parse import quote\n", + "from acdh_wikidata_pyutils import URL_STUB\n", + "from tqdm.notebook import tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "f03b29bb", + "metadata": {}, + "outputs": [], + "source": [ + "items = TempEntityClass.objects.filter(img_url__icontains=\"width\").filter(img_url__icontains=\"commons.wikimedia.org/w/index\")" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "8ec94f95", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e730f9bf96b4439e86ded04fdaa525f4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "for x in tqdm(items, total=len(items)):\n", + " img_name = x.img_url.split('/')[-1].split('&')[0]\n", + " new_url = URL_STUB.format(quote(img_name))\n", + " x.img_url = new_url\n", + " x.save()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "a8048e89", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'https://commons.wikimedia.org/w/index.php?title=Special:Redirect/file/{}'" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "URL_STUB" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "054659cb", + "metadata": {}, + "outputs": [], + "source": [ + "hansi = None" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "4b1ad510", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "argument of type 'NoneType' is not iterable", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[28], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mURL_STUB\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mhansi\u001b[49m:\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124masd\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mTypeError\u001b[0m: argument of type 'NoneType' is not iterable" + ] + } + ], + "source": [ + "if URL_STUB in hansi:\n", + " print(\"asd\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "454dd98d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Django Shell-Plus", + "language": "python", + "name": "django_extensions" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/requirements.txt b/requirements.txt index ab6f1b6..c1bcbaa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ acdh-django-browsing acdh_geonames_utils acdh-id-reconciler>=0.2,<1 acdh-tei-pyutils>=0.34,<1 -acdh-wikidata-pyutils>=0.5,<1 +acdh-wikidata-pyutils==1.0 apis-override-select2js==0.1 Django>4.1,<6 django-admin-csvexport From 4567899811184f0144bff114b2ffbff37f091791 Mon Sep 17 00:00:00 2001 From: csae8092 Date: Tue, 23 Jan 2024 08:17:36 +0100 Subject: [PATCH 2/4] fix in test --- apis_core/apis_entities/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apis_core/apis_entities/tests.py b/apis_core/apis_entities/tests.py index 64b59a1..8ebe687 100644 --- a/apis_core/apis_entities/tests.py +++ b/apis_core/apis_entities/tests.py @@ -375,4 +375,4 @@ def test_026_fetch_image(self): def test_027_img_credit(self): entity = import_from_normdata("https://www.wikidata.org/wiki/Q76483", "person") self.assertTrue(entity.img_url) - self.assertTrue("Wikimedia Commons", entity.img_credit) + self.assertIsNone(entity.img_credit) From aedc200960cf1b8ce9b14a63901880575555374f Mon Sep 17 00:00:00 2001 From: csae8092 Date: Tue, 23 Jan 2024 08:48:54 +0100 Subject: [PATCH 3/4] na bitte, geht doch --- apis_core/apis_entities/tests.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/apis_core/apis_entities/tests.py b/apis_core/apis_entities/tests.py index 8ebe687..f326fa9 100644 --- a/apis_core/apis_entities/tests.py +++ b/apis_core/apis_entities/tests.py @@ -370,9 +370,8 @@ def test_026_fetch_image(self): entity = import_from_normdata(grillparzer, "person") entity.fetch_image() self.assertTrue(entity.img_url) - self.assertTrue("Wikimedia Commons", entity.img_credit) + self.assertTrue("Wikimedia Commons" in entity.img_credit()) def test_027_img_credit(self): entity = import_from_normdata("https://www.wikidata.org/wiki/Q76483", "person") - self.assertTrue(entity.img_url) - self.assertIsNone(entity.img_credit) + self.assertIsNone(entity.img_credit()) From 6d67a1176e0d64da06fbd60d887168d6e6b4787d Mon Sep 17 00:00:00 2001 From: csae8092 Date: Tue, 23 Jan 2024 08:59:16 +0100 Subject: [PATCH 4/4] updated test workflow actions --- .github/workflows/test.yml | 4 +- issue__112_fix_image_urls.ipynb | 87 ++++++--------------------------- set_env_variables.sh | 2 +- 3 files changed, 18 insertions(+), 75 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6ac86e6..fa03400 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,10 +21,10 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python 3.11 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.11" - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: path: ${{ env.pythonLocation }} key: ${{ env.pythonLocation }}-${{ hashFiles('requirements.txt') }} diff --git a/issue__112_fix_image_urls.ipynb b/issue__112_fix_image_urls.ipynb index 18edf3b..9d5176a 100644 --- a/issue__112_fix_image_urls.ipynb +++ b/issue__112_fix_image_urls.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "eda7419e", "metadata": {}, "outputs": [], @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "f03b29bb", "metadata": {}, "outputs": [], @@ -24,85 +24,28 @@ }, { "cell_type": "code", - "execution_count": 25, - "id": "8ec94f95", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e730f9bf96b4439e86ded04fdaa525f4", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "0it [00:00, ?it/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "for x in tqdm(items, total=len(items)):\n", - " img_name = x.img_url.split('/')[-1].split('&')[0]\n", - " new_url = URL_STUB.format(quote(img_name))\n", - " x.img_url = new_url\n", - " x.save()" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "a8048e89", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'https://commons.wikimedia.org/w/index.php?title=Special:Redirect/file/{}'" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "URL_STUB" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "054659cb", + "execution_count": null, + "id": "e8e31fb5", "metadata": {}, "outputs": [], "source": [ - "hansi = None" + "items.count()" ] }, { "cell_type": "code", - "execution_count": 28, - "id": "4b1ad510", + "execution_count": null, + "id": "8ec94f95", "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "argument of type 'NoneType' is not iterable", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[28], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mURL_STUB\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mhansi\u001b[49m:\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124masd\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mTypeError\u001b[0m: argument of type 'NoneType' is not iterable" - ] - } - ], + "outputs": [], "source": [ - "if URL_STUB in hansi:\n", - " print(\"asd\")" + "for x in tqdm(items, total=len(items)):\n", + " img_name = x.img_url.split('/')[-1].split('&')[0]\n", + " new_url = URL_STUB.format(quote(img_name))\n", + " if len(new_url) > 300:\n", + " new_url = None\n", + " x.img_url = new_url\n", + " x.save()" ] }, { diff --git a/set_env_variables.sh b/set_env_variables.sh index e0a3156..77758e8 100644 --- a/set_env_variables.sh +++ b/set_env_variables.sh @@ -1 +1 @@ -export $(grep -v '^#' .env | xargs) \ No newline at end of file +export $(grep -v '^#' .secret | xargs) \ No newline at end of file