From 47810da3ec50b0354b09e753aa4adb6cad3d7bdf Mon Sep 17 00:00:00 2001 From: Birger Schacht Date: Thu, 28 Sep 2023 11:28:39 +0200 Subject: [PATCH] feat: add `Source` model and import logic --- apis_ontology/management/commands/import.py | 23 +++++++++++++++++++- apis_ontology/migrations/0005_source.py | 24 +++++++++++++++++++++ apis_ontology/models.py | 13 +++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 apis_ontology/migrations/0005_source.py diff --git a/apis_ontology/management/commands/import.py b/apis_ontology/management/commands/import.py index 7ce6c20..b851f04 100644 --- a/apis_ontology/management/commands/import.py +++ b/apis_ontology/management/commands/import.py @@ -2,13 +2,29 @@ from django.core.management.base import BaseCommand -from apis_ontology.models import Event, Institution, Person, Place, Work, Title, Profession +from apis_ontology.models import Event, Institution, Person, Place, Work, Title, Profession, Source from apis_core.apis_metainfo.models import Uri, RootObject from apis_core.apis_relations.models import Property, TempTriple SRC="https://apis.acdh.oeaw.ac.at/apis/api" +def import_sources(): + nextpage = f"{SRC}/metainfo/source/?format=json&limit=1000" + while nextpage: + print(nextpage) + page = requests.get(nextpage) + data = page.json() + nextpage = data['next'] + for result in data["results"]: + print(result["url"]) + newsource, created = Source.objects.get_or_create(id=result["id"]) + for attribute in result: + if hasattr(newsource, attribute): + setattr(newsource, attribute, result[attribute]) + newsource.save() + + class Command(BaseCommand): help = "Import data from legacy APIS instance" @@ -16,6 +32,7 @@ def add_arguments(self, parser): parser.add_argument("--entities", action="store_true") parser.add_argument("--urls", action="store_true") parser.add_argument("--relations", action="store_true") + parser.add_argument("--sources", action="store_true") parser.add_argument("--all") @@ -24,6 +41,10 @@ def handle(self, *args, **options): options["entities"] = True options["urls"] = True options["relations"] = True + options["sources"] = True + + if options["sources"]: + import_sources() entities = { "event": { diff --git a/apis_ontology/migrations/0005_source.py b/apis_ontology/migrations/0005_source.py new file mode 100644 index 0000000..e7262d0 --- /dev/null +++ b/apis_ontology/migrations/0005_source.py @@ -0,0 +1,24 @@ +# Generated by Django 4.1.11 on 2023-09-28 04:26 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('apis_ontology', '0004_remove_person_profession_profession_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='Source', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('orig_filename', models.CharField(blank=True, max_length=255)), + ('indexed', models.BooleanField(default=False)), + ('pubinfo', models.CharField(blank=True, max_length=400)), + ('author', models.CharField(blank=True, max_length=255)), + ('orig_id', models.PositiveIntegerField(blank=True, null=True)), + ], + ), + ] diff --git a/apis_ontology/models.py b/apis_ontology/models.py index 5fc32e4..d9252a6 100644 --- a/apis_ontology/models.py +++ b/apis_ontology/models.py @@ -16,6 +16,19 @@ class Meta: abstract = True +class Source(models.Model): + orig_filename = models.CharField(max_length=255, blank=True) + indexed = models.BooleanField(default=False) + pubinfo = models.CharField(max_length=400, blank=True) + author = models.CharField(max_length=255, blank=True) + orig_id = models.PositiveIntegerField(blank=True, null=True) + + def __str__(self): + if self.author and self.orig_filename: + return f"{self.orig_filename}, stored by {self.author}" + return f"(ID: {self.id})".format(self.id) + + class Title(models.Model): name = models.CharField(max_length=255, blank=True)