-
Notifications
You must be signed in to change notification settings - Fork 0
/
make_storymap_data.py
98 lines (89 loc) · 3.19 KB
/
make_storymap_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import json
from acdh_tei_pyutils.tei import TeiReader
from tqdm import tqdm
from config import MASTER_ENRICHED, FILTER_WORDS
main_file = MASTER_ENRICHED
START_YEAR, END_YEAR = 1879, 1932
ns = {"tei": "http://www.tei-c.org/ns/1.0"}
def get_name(node):
return " ".join(node.xpath("./tei:placeName/text()", namespaces=ns)[0].split())
doc = TeiReader(main_file)
all_places = doc.any_xpath(".//tei:place")
places = []
for x in all_places:
add_name = True
name = get_name(x)
for s in FILTER_WORDS:
if s in name.lower():
add_name = False
if add_name:
places.append(x)
no_match = set()
for year in tqdm(range(START_YEAR, END_YEAR), total=len(range(START_YEAR, END_YEAR))):
story_map_data = {
"storymap": {
"call_to_action": True,
"call_to_action_text": "",
"map_as_image": False,
"zoomify": True,
"map_type": "",
"map_subdomains": "",
"attribution": "",
"slides": [],
}
}
for i, x in enumerate(places):
cur_date = x.xpath(".//ancestor::tei:event[1]", namespaces=ns)[0].attrib["when"]
if f"{year}" in cur_date:
parent = x.getparent()
name = get_name(x)
slide = {}
try:
next_place = places[i + 1]
except IndexError:
next_place = None
if next_place is not None:
next_name = get_name(next_place)
if name == next_name:
continue
else:
date = x.xpath(".//ancestor::tei:event[1]", namespaces=ns)[0].attrib[
"when"
]
slide["text"] = {
"headline": name,
"text": f"Schnitzler war am {date} in {name}",
}
try:
akon = x.xpath(".//tei:link", namespaces=ns)[0].attrib["target"]
except IndexError:
akon = None
if akon is not None:
slide["media"] = {
"caption": f"Postkarte von {name}",
"credit": "ÖNB",
"url": f"{akon}",
}
# try:
try:
coords = x.xpath(".//tei:geo/text()", namespaces=ns)[0]
except IndexError:
print(name, x.xpath(".//tei:geo[0]/text()", namespaces=ns))
continue
lat, lon = coords.split()[0:2]
# except (ValueError, IndexError):
# print(date, name, x.xpath('.//tei:geo/text()', namespaces=ns))
# continue
try:
slide["location"] = {
"lat": float(lat),
"line": True,
"lon": float(lon),
"zoom": 12,
}
except ValueError:
continue
slide["date"] = date
story_map_data["storymap"]["slides"].append(slide)
with open(f"./html/data/{year}.json", "w") as f:
json.dump(story_map_data, f)