From 14e06f5f6a9fc7117e7fa02f758b6d8b3120b33d Mon Sep 17 00:00:00 2001 From: csae8092 Date: Mon, 4 Nov 2024 15:01:03 +0100 Subject: [PATCH 1/4] PMB-import theatre and cinema events l#234 --- .gitignore | 1 + notebooks/issue__234_kino-events.ipynb | 313 +++++++++++++++++++++++++ set_env_variables.sh | 2 +- 3 files changed, 315 insertions(+), 1 deletion(-) create mode 100644 notebooks/issue__234_kino-events.ipynb diff --git a/.gitignore b/.gitignore index 97276f5..29c2ca2 100644 --- a/.gitignore +++ b/.gitignore @@ -194,3 +194,4 @@ brahms_pmb.csv hanslick-werke-wikidata.csv Untitled1.ipynb event-types.csv +events-pmb_ids.csv diff --git a/notebooks/issue__234_kino-events.ipynb b/notebooks/issue__234_kino-events.ipynb new file mode 100644 index 0000000..130db37 --- /dev/null +++ b/notebooks/issue__234_kino-events.ipynb @@ -0,0 +1,313 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "080d64de-e40b-4aa3-94a0-adcc59c2c508", + "metadata": {}, + "outputs": [], + "source": [ + "from tqdm import tqdm\n", + "from dumper.utils import gsheet_to_df\n", + "from apis_core.utils import get_object_from_pk_or_uri\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "aba2e0d2-933f-4ce2-9e05-5b7049e842b6", + "metadata": {}, + "outputs": [], + "source": [ + "col, _ = Collection.objects.get_or_create(name=\"Schnitzler-Theater\")\n", + "temp_col, _ = Collection.objects.get_or_create(name=\"temp-issue-227\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "69207701-41c4-4151-b36a-037613baa634", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "200\n" + ] + } + ], + "source": [ + "df = gsheet_to_df(\"1NAyAetEWR1RzyGcdTWc0KGK2ip19PpSyIOBBCOnyRnA\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb370aa1-33f6-4d1b-97d9-0c09f3fd8bb3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
start-dateend-datestart-date-writtenend-date-writtentitleeventType
01876-03-151876-03-151876-03-151876-03-15Aufführung von Das heiß Eysen; Die ehrlich Bäc...1200
11876-04-021876-04-021876-04-021876-04-02Gesellschaftskonzert, 2.4.18764302
21876-04-271876-04-271876-04-271876-04-27Aufführung von Götz von Berlichingen, 27.4.18761200
31876-05-081876-05-081876-05-081876-05-08Aufführung von Uriel Acosta. Trauerspiel in fü...1200
41876-06-291876-06-291876-06-291876-06-29Aufführung von Die Hugenotten, 29.6.18761200
.....................
39051931-10-091931-10-091931-10-091931-10-09Filmvorführung von Razzia auf Liebe, 9.10.19314234
39061931-10-111931-10-111931-10-111931-10-11Filmvorführung von Eva, 11.10.19314234
39071931-10-121931-10-121931-10-121931-10-12Filmvorführung von Purpur und Waschblau, 12.10...4234
39081931-10-161931-10-161931-10-161931-10-16Filmvorführung von Der König der Blitzer, 16.1...4234
39091931-10-191931-10-191931-10-191931-10-19Filmvorführung von Café Paradies, 19.10.19314234
\n", + "

3910 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " start-date end-date start-date-written end-date-written \\\n", + "0 1876-03-15 1876-03-15 1876-03-15 1876-03-15 \n", + "1 1876-04-02 1876-04-02 1876-04-02 1876-04-02 \n", + "2 1876-04-27 1876-04-27 1876-04-27 1876-04-27 \n", + "3 1876-05-08 1876-05-08 1876-05-08 1876-05-08 \n", + "4 1876-06-29 1876-06-29 1876-06-29 1876-06-29 \n", + "... ... ... ... ... \n", + "3905 1931-10-09 1931-10-09 1931-10-09 1931-10-09 \n", + "3906 1931-10-11 1931-10-11 1931-10-11 1931-10-11 \n", + "3907 1931-10-12 1931-10-12 1931-10-12 1931-10-12 \n", + "3908 1931-10-16 1931-10-16 1931-10-16 1931-10-16 \n", + "3909 1931-10-19 1931-10-19 1931-10-19 1931-10-19 \n", + "\n", + " title eventType \n", + "0 Aufführung von Das heiß Eysen; Die ehrlich Bäc... 1200 \n", + "1 Gesellschaftskonzert, 2.4.1876 4302 \n", + "2 Aufführung von Götz von Berlichingen, 27.4.1876 1200 \n", + "3 Aufführung von Uriel Acosta. Trauerspiel in fü... 1200 \n", + "4 Aufführung von Die Hugenotten, 29.6.1876 1200 \n", + "... ... ... \n", + "3905 Filmvorführung von Razzia auf Liebe, 9.10.1931 4234 \n", + "3906 Filmvorführung von Eva, 11.10.1931 4234 \n", + "3907 Filmvorführung von Purpur und Waschblau, 12.10... 4234 \n", + "3908 Filmvorführung von Der König der Blitzer, 16.1... 4234 \n", + "3909 Filmvorführung von Café Paradies, 19.10.1931 4234 \n", + "\n", + "[3910 rows x 6 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "96d77edf-cca9-4d27-ac33-6d101865a9d0", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████████████████████████████████████| 3910/3910 [12:54<00:00, 5.05it/s]\n" + ] + } + ], + "source": [ + "pmb_ids = []\n", + "for i, row in tqdm(df.iterrows(), total=len(df)):\n", + " event_type = EventType.objects.get(id=row[\"eventType\"].split(\" \")[0])\n", + " item, _ = Event.objects.get_or_create(\n", + " name=row[\"title\"],\n", + " start_date_written=row[\"start-date\"],\n", + " kind=event_type\n", + " )\n", + " item.collection.add(col)\n", + " item.collection.add(temp_col)\n", + " pmb_ids.append([i, f\"{item.start_date}\", item.name, item.id])\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "74861981-9b45-49af-a3e8-aeddd139b0b4", + "metadata": {}, + "outputs": [], + "source": [ + "new_df = pd.DataFrame(pmb_ids)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f7329d46-ab6b-4ecc-a518-1872012576a4", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"pmb_ids\"] = new_df[3]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "c81f0485-e6fc-477d-9cad-e75068a8d28f", + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(\"events-pmb_ids.csv\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca6a59bb-c8a1-45ef-8797-ba1012c7bc9f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Django Shell-Plus", + "language": "python", + "name": "django_extensions" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/set_env_variables.sh b/set_env_variables.sh index e0a3156..77758e8 100644 --- a/set_env_variables.sh +++ b/set_env_variables.sh @@ -1 +1 @@ -export $(grep -v '^#' .env | xargs) \ No newline at end of file +export $(grep -v '^#' .secret | xargs) \ No newline at end of file From 3edecd695c1546d6a3ebfbac08f0d07c5def9197 Mon Sep 17 00:00:00 2001 From: csae8092 Date: Tue, 5 Nov 2024 08:21:35 +0100 Subject: [PATCH 2/4] kino korrekturen #237; ToDo: run against production [skip ci] --- issue__237-kino-corrections.ipynb | 131 ++++++++++++++++++++++++++++++ set_env_variables.sh | 2 +- 2 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 issue__237-kino-corrections.ipynb diff --git a/issue__237-kino-corrections.ipynb b/issue__237-kino-corrections.ipynb new file mode 100644 index 0000000..2d235c5 --- /dev/null +++ b/issue__237-kino-corrections.ipynb @@ -0,0 +1,131 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "350e5679-7ae5-4889-a1c3-7c091e57d0cc", + "metadata": {}, + "outputs": [], + "source": [ + "from tqdm import tqdm\n", + "from dumper.utils import gsheet_to_df\n", + "from django.core.exceptions import ObjectDoesNotExist\n", + "from django.db import IntegrityError\n", + "from apis_core.utils import get_object_from_pk_or_uri\n", + "from AcdhArcheAssets.uri_norm_rules import get_normalized_uri\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4991d8c9-7686-454c-9b69-0ea07f1e3a50", + "metadata": {}, + "outputs": [], + "source": [ + "col, _ = Collection.objects.get_or_create(name=\"Schnitzler-Kino\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5ed45d6-f1fb-4886-ab2f-6f7555cdd351", + "metadata": {}, + "outputs": [], + "source": [ + "df = gsheet_to_df(\"1-Kl0pFsiABqzNZ65PoGp2SWrGadcRega-DIs_NxLcHE\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "828adcc8-bb37-4f5b-9432-a52d8558e7b5", + "metadata": {}, + "outputs": [], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c841018b-89fe-4a40-babe-bf6f19571b04", + "metadata": {}, + "outputs": [], + "source": [ + "for i, row in tqdm(df.iterrows(), total=len(df)):\n", + " film_id = row[\"film-id\"]\n", + " try:\n", + " item = Work.objects.get(id=film_id)\n", + " except ObjectDoesNotExist:\n", + " print(film_id)\n", + " name = row[\"titel-korrektur\"]\n", + " if isinstance(name, str):\n", + " item.name = name\n", + " try:\n", + " start_date = str(int(row[\"date-korrektur\"]))\n", + " item.start_date_written = start_date\n", + " item.end_date_written = start_date\n", + " except ValueError:\n", + " pass\n", + " if isinstance(row[\"wikidata-neu\"], str):\n", + " url = get_normalized_uri(row[\"wikidata-neu\"])\n", + " try:\n", + " uri = Uri.objects.get(\n", + " uri=url\n", + " )\n", + " except ObjectDoesNotExist:\n", + " uri = Uri.objects.create(\n", + " uri=url,\n", + " domain=\"wikidata\",\n", + " )\n", + " uri.entity = item\n", + " uri.save()\n", + " if isinstance(row[\"imdb-neu\"], str):\n", + " url = get_normalized_uri(row[\"imdb-neu\"])\n", + " try:\n", + " uri = Uri.objects.get(\n", + " uri=url\n", + " )\n", + " except ObjectDoesNotExist:\n", + " uri = Uri.objects.create(\n", + " uri=url,\n", + " domain=\"imdb\",\n", + " )\n", + " uri.entity = item\n", + " uri.save()\n", + " \n", + " item.save()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3775074-7267-443b-9f87-a61c8344783a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Django Shell-Plus", + "language": "python", + "name": "django_extensions" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/set_env_variables.sh b/set_env_variables.sh index 77758e8..e0a3156 100644 --- a/set_env_variables.sh +++ b/set_env_variables.sh @@ -1 +1 @@ -export $(grep -v '^#' .secret | xargs) \ No newline at end of file +export $(grep -v '^#' .env | xargs) \ No newline at end of file From e032b58be033156a8aa92aee6e15b0c70d8dc131 Mon Sep 17 00:00:00 2001 From: csae8092 Date: Tue, 5 Nov 2024 09:13:52 +0100 Subject: [PATCH 3/4] closes #238 and closes #237 [skip ci] --- .../issue__237-kino-corrections.ipynb | 1 + notebooks/issue__238-notes-for-kino.ipynb | 94 +++++++++++++++++++ 2 files changed, 95 insertions(+) rename issue__237-kino-corrections.ipynb => notebooks/issue__237-kino-corrections.ipynb (98%) create mode 100644 notebooks/issue__238-notes-for-kino.ipynb diff --git a/issue__237-kino-corrections.ipynb b/notebooks/issue__237-kino-corrections.ipynb similarity index 98% rename from issue__237-kino-corrections.ipynb rename to notebooks/issue__237-kino-corrections.ipynb index 2d235c5..aab9b48 100644 --- a/issue__237-kino-corrections.ipynb +++ b/notebooks/issue__237-kino-corrections.ipynb @@ -7,6 +7,7 @@ "metadata": {}, "outputs": [], "source": [ + "# run against production 2024-11-05\n", "from tqdm import tqdm\n", "from dumper.utils import gsheet_to_df\n", "from django.core.exceptions import ObjectDoesNotExist\n", diff --git a/notebooks/issue__238-notes-for-kino.ipynb b/notebooks/issue__238-notes-for-kino.ipynb new file mode 100644 index 0000000..7671ea1 --- /dev/null +++ b/notebooks/issue__238-notes-for-kino.ipynb @@ -0,0 +1,94 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "350e5679-7ae5-4889-a1c3-7c091e57d0cc", + "metadata": {}, + "outputs": [], + "source": [ + "# run against prodcution 2024-11-05\n", + "from tqdm import tqdm\n", + "from dumper.utils import gsheet_to_df\n", + "from django.core.exceptions import ObjectDoesNotExist" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4991d8c9-7686-454c-9b69-0ea07f1e3a50", + "metadata": {}, + "outputs": [], + "source": [ + "col, _ = Collection.objects.get_or_create(name=\"Schnitzler-Kino\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5ed45d6-f1fb-4886-ab2f-6f7555cdd351", + "metadata": {}, + "outputs": [], + "source": [ + "df = gsheet_to_df(\"1mzXwjYjwKfvADur9hcrY6L1IPpMttEYOHgKFQP2kDTw\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "828adcc8-bb37-4f5b-9432-a52d8558e7b5", + "metadata": {}, + "outputs": [], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c841018b-89fe-4a40-babe-bf6f19571b04", + "metadata": {}, + "outputs": [], + "source": [ + "for i, row in tqdm(df.iterrows(), total=len(df)):\n", + " entity_id = row[\"source_id\"]\n", + " try:\n", + " item = Event.objects.get(id=entity_id)\n", + " except ObjectDoesNotExist:\n", + " print(entity_id)\n", + " note = row[\"note\"]\n", + " item.notes = row[\"note\"]\n", + " item.save()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3775074-7267-443b-9f87-a61c8344783a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Django Shell-Plus", + "language": "python", + "name": "django_extensions" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From dfccc805e83ab1dc41326a945a6507c0e7573cc9 Mon Sep 17 00:00:00 2001 From: csae8092 Date: Tue, 5 Nov 2024 10:53:26 +0100 Subject: [PATCH 4/4] reduces gunicorn workers --- start-server.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/start-server.sh b/start-server.sh index 3e3762e..32cbb10 100755 --- a/start-server.sh +++ b/start-server.sh @@ -9,4 +9,4 @@ python manage.py collectstatic --no-input ./download_files.sh python manage.py find_duplicated_persons python manage.py find_duplicated_places -gunicorn pmb.wsgi --user www-data --bind 0.0.0.0:8010 --workers 3 --timeout 600 & nginx -g "daemon off;" \ No newline at end of file +gunicorn pmb.wsgi --user www-data --bind 0.0.0.0:8010 --workers 1 --timeout 600 & nginx -g "daemon off;" \ No newline at end of file