-
Notifications
You must be signed in to change notification settings - Fork 0
/
consolidate_data.py
86 lines (71 loc) · 2.78 KB
/
consolidate_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import csv
import os
import glob
import re
def read_timestep(root_path: str, time: str):
"""
Read a population from file and return a list of dictionaries of the relationships and the agents
args:
path: the directory containing all the timestep foldrs
timestep: which timestep (folder) to read
returns:
the re-constituted population
"""
path = os.path.join(root_path, time)
agent_file = glob.glob(os.path.join(path, "*_agents.csv"))[0]
rel_file = glob.glob(os.path.join(path, "*_relationships.csv"))[0]
feat_files = glob.glob(os.path.join(path, "*_feat_*.csv"))
exposure_files = glob.glob(os.path.join(path, "*_exposure_*.csv"))
assert os.path.isfile(agent_file), f"can't find agents.csv in {dir}"
assert os.path.isfile(rel_file), f"can't find relationships.csv in {dir}"
_, agent_filename = os.path.split(agent_file)
# create agents dict
agents = {}
# re-create all agents and add to population
with open(agent_file, newline="") as f:
reader = csv.DictReader(f)
for row in reader:
agents[row["id"]] = row
agents[row["id"]]["time"] = time
def update_agent_extras(files, extra_type):
pattern = re.compile(f"^.*_{extra_type}_(.*)\.csv$")
for file in files:
m = pattern.match(file)
if m is not None:
extra = m.group(1)
with open(file, newline="") as f:
reader = csv.DictReader(f)
for row in reader:
for k, v in row.items():
if k != "agent":
agents[row["agent"]][f"{extra}_{k}"] = v.lower() if v in ('True', 'False') else v
update_agent_extras(feat_files, "feat")
update_agent_extras(exposure_files, "exposure")
# re-create all relationships and write to file
rels = []
with open(rel_file, newline="") as f:
reader = csv.DictReader(f)
for row in reader:
row["time"] = time
rels.append(row)
return rels, list(agents.values())
agent_dicts = []
rel_dicts = []
resultdir = "results"
for path in os.listdir(resultdir):
if os.path.isdir(os.path.join(resultdir, path)):
rels, agents = read_timestep(resultdir, path)
rel_dicts.extend(rels)
agent_dicts.extend(agents)
print(f"Creating rel file")
with open("rels.csv", "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=list(rel_dicts[1].keys()))
writer.writeheader()
for item in rel_dicts:
writer.writerow(item)
print(f"Creating agent file")
with open("agents.csv", "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=list(agent_dicts[1].keys()))
writer.writeheader()
for item in agent_dicts:
writer.writerow(item)