Skip to content

Commit

Permalink
Merge pull request #158 from arthur-schnitzler/157-serialize-relation…
Browse files Browse the repository at this point in the history
…s-as-network-graph

157 serialize relations as network graph
  • Loading branch information
csae8092 authored Feb 26, 2024
2 parents c96455d + 5a90b68 commit cf775de
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 14 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,7 @@ media/duplicated_*.csv
Untitled.ipynb
listevent.xml
relations.csv
hansi.*
media/relations.gexf
edges.csv
nodes.csv
30 changes: 30 additions & 0 deletions apis_core/apis_entities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,8 @@ def save(self, *args, **kwargs):
return self

class Meta:
verbose_name = "Person"
verbose_name_plural = "Personen"
ordering = [
"id",
]
Expand All @@ -526,6 +528,10 @@ def get_api_url(self):
def get_icon(self):
return "bi bi-people apis-person"

@classmethod
def get_color(self):
return "#720e07"


class Place(AbstractEntity):
kind = models.ForeignKey(
Expand All @@ -541,6 +547,8 @@ def save(self, *args, **kwargs):
return self

class Meta:
verbose_name = "Ort"
verbose_name_plural = "Orte"
ordering = [
"id",
]
Expand All @@ -555,13 +563,19 @@ def get_api_url(self):
def get_icon(self):
return "bi bi-map apis-place"

@classmethod
def get_color(self):
return "#5bc0eb"


class Institution(AbstractEntity):
kind = models.ForeignKey(
InstitutionType, blank=True, null=True, on_delete=models.SET_NULL
)

class Meta:
verbose_name = "Institution"
verbose_name_plural = "Institutionen"
ordering = [
"id",
]
Expand All @@ -576,13 +590,19 @@ def get_api_url(self):
def get_icon(self):
return "bi bi-building-gear apis-institution"

@classmethod
def get_color(self):
return "#1d3461"


class Event(AbstractEntity):
kind = models.ForeignKey(
EventType, blank=True, null=True, on_delete=models.SET_NULL
)

class Meta:
verbose_name = "Ereignis"
verbose_name_plural = "Ereignisse"
ordering = [
"id",
]
Expand All @@ -597,11 +617,17 @@ def get_api_url(self):
def get_icon(self):
return "bi bi-calendar3 apis-event"

@classmethod
def get_color(self):
return "#9bc53d"


class Work(AbstractEntity):
kind = models.ForeignKey(WorkType, blank=True, null=True, on_delete=models.SET_NULL)

class Meta:
verbose_name = "Werk"
verbose_name_plural = "Werke"
ordering = [
"id",
]
Expand All @@ -616,6 +642,10 @@ def get_api_url(self):
def get_icon(self):
return "bi bi-book apis-work"

@classmethod
def get_color(self):
return "#ff8600"


a_ents = getattr(settings, "APIS_ADDITIONAL_ENTITIES", False)

Expand Down
99 changes: 91 additions & 8 deletions apis_core/apis_relations/management/commands/dump_relations.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import os
import pandas as pd
import networkx as nx
import recordlinkage
from tqdm import tqdm

from datetime import datetime

from django.conf import settings
from django.core.management.base import BaseCommand
from icecream import ic
from tqdm import tqdm
from typing import Any
from apis_core.apis_relations.models import AbstractRelation
from dumper.utils import upload_files_to_owncloud, write_report
Expand Down Expand Up @@ -42,7 +43,13 @@ def handle(self, *args: Any, **options: Any) -> str | None:
df.set_index("relation_pk", inplace=True, drop=False)
indexer = recordlinkage.Index()
indexer.block(
["relation_type", "source_id", "target_id", "start_date", "end_date"]
[
"relation_type",
"source_id",
"target_id",
"relation_start_date_written",
"relation_end_date_written",
]
)
duplicates = indexer.index(df)
print(f"deleting {len(duplicates)} duplicated relations")
Expand All @@ -59,16 +66,92 @@ def handle(self, *args: Any, **options: Any) -> str | None:
break
print(deleted)
df.drop(deleted)
save_path = os.path.join(settings.MEDIA_ROOT, "relations.csv")
df.to_csv(save_path, index=False)
end_time = datetime.now().strftime(settings.PMB_TIME_PATTERN)
report = [os.path.basename(__file__), start_time, end_time]
write_report(report)
relations_csv = os.path.join(settings.MEDIA_ROOT, "relations.csv")
df.to_csv(relations_csv, index=False)
print(f"serialized {len(df)} relations")
files = list()
files.append(save_path)
files.append(relations_csv)

print("and now serialize relations as network graph")
G = nx.Graph()
nodes = {}
edges = []
edges_labels = ["source", "target", "type", "label", "date"]
for i, row in tqdm(df.iterrows(), total=len(df)):
source_node = {
"id": row["source_id"],
"label": row["source"],
"type": row["source_type"],
"color": row["source_color"],
"start_date": row["source_start_date"],
"start_date_written": row["source_start_date_written"],
}
nodes[row["source_id"]] = source_node
G.add_nodes_from([(row["source_id"], source_node)])
target_node = {
"id": row["target_id"],
"label": row["target"],
"type": row["target_type"],
"color": row["target_color"],
"date": row["target_start_date"],
"start_date_written": row["target_start_date_written"],
}
nodes[row["target_id"]] = target_node
G.add_nodes_from([(row["target_id"], target_node)])
G.add_edges_from(
[
(
row["source_id"],
row["target_id"],
{
"relation_class": row["relation_class"],
"label": row["relation_type"],
"id": row["relation_pk"],
"start_date": row["relation_start_date"],
"start_date_written": row["relation_start_date_written"],
"end_date": row["relation_end_date"],
"end_date_written": row["relation_end_date_written"],
},
)
]
)
edges.append(
[
row["source_id"],
row["target_id"],
row["relation_class"],
row["relation_type"],
row["relation_start_date"],
]
)
gexf_file = os.path.join(settings.MEDIA_ROOT, "relations.gexf")
nx.write_gexf(G, gexf_file)
print(f"serialized {len(df)} relations")
files.append(gexf_file)

ndf = pd.DataFrame(edges, columns=edges_labels)
edges_file = os.path.join(settings.MEDIA_ROOT, "edges.csv")
ndf.to_csv(edges_file, index=False)
files.append(edges_file)

data = []
for key, value in nodes.items():
data.append(value)

df = pd.DataFrame(data)
nodes_file = os.path.join(settings.MEDIA_ROOT, "nodes.csv")
df.to_csv(nodes_file, index=False)
files.append(nodes_file)
ic(files)

try:
upload_files_to_owncloud(files)
for x in files:
print(f"uploading {x} to owncloud")
except Exception as e:
ic(e)

end_time = datetime.now().strftime(settings.PMB_TIME_PATTERN)
report = [os.path.basename(__file__), start_time, end_time]
write_report(report)
return "done"
20 changes: 16 additions & 4 deletions apis_core/apis_relations/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,24 @@ def get_web_object(self):
result = {
"relation_pk": self.pk,
"relation_type": self.relation_type.name,
"source": namea,
"target": nameb,
"relation_class": f"{namea._meta.verbose_name} -> {nameb._meta.verbose_name}",
"relation_name": self.__str__(),
"relation_start_date": f"{self.start_date}",
"relation_end_date": f"{self.end_date}",
"relation_start_date_written": f"{self.start_date_written}",
"relation_end_date_written": f"{self.end_date_written}",
"source": namea.__str__(),
"source_id": namea.id,
"source_type": namea._meta.verbose_name,
"source_start_date": f"{namea.start_date}",
"source_start_date_written": f"{namea.start_date_written}",
"source_color": namea.get_color(),
"target": nameb.__str__(),
"target_id": nameb.id,
"start_date": self.start_date_written,
"end_date": self.end_date_written,
"target_type": nameb._meta.verbose_name,
"target_start_date": f"{nameb.start_date}",
"target_start_date_written": f"{nameb.start_date_written}",
"target_color": nameb.get_color(),
}
return result

Expand Down
1 change: 0 additions & 1 deletion apis_core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from django.shortcuts import get_object_or_404

# from apis_core.apis_entities.detail_views import get_object_from_pk_or_uri
from apis_core.apis_metainfo.models import TempEntityClass, Uri


Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ acdh-id-reconciler>=0.2,<1
acdh-tei-pyutils>=1.1,<2
acdh-wikidata-pyutils==1.0
apis-override-select2js==0.1
Django>4.1,<6
Django>=5.0,<6
django-admin-csvexport
django-autocomplete-light
django-crispy-forms
Expand All @@ -16,6 +16,7 @@ pandas
pylobid
psycopg2
pyocclient==0.6
networkx>=3.2.1,<4
icecream
flake8
black
Expand Down

0 comments on commit cf775de

Please sign in to comment.