-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path04_complex_kge_training.py
128 lines (106 loc) · 3.96 KB
/
04_complex_kge_training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#script to calculate ComplEx embeddings for the entities and relations of the linkedpaperswithcode dataset
import torch
from torch_geometric.data import Dataset, download_url, Data
from torch_geometric.transforms import RandomLinkSplit
from torch_geometric import seed_everything
from torch_geometric.nn import ComplEx
import torch
import torch.optim as optim
import time
#path to the output evaluation file
with open(".../complex-kge-summary.txt", "w",) as z:
start_time = time.ctime()
z.write(f'Calculate embeddings with ComplEx started at: {start_time}\n')
z.flush()
print('Calculate embeddings with ComplEx started at: ', start_time)
first_numbers = []
second_numbers = []
third_numbers = []
#input dataset
with open('.../triples.txt', 'r') as file:
for line in file:
numbers = line.split()
first_numbers.append(int(numbers[0]))
second_numbers.append(int(numbers[1]))
third_numbers.append(int(numbers[2]))
# Konvertiere die Listen in PyTorch-Tensoren
first_tensor = torch.tensor(first_numbers)
second_tensor = torch.tensor(second_numbers)
third_tensor = torch.tensor(third_numbers)
# Kombiniere die beiden Tensoren in einem 2D-Tensor
combined_tensor = torch.stack((first_tensor, third_tensor))
data = Data(edge_index=combined_tensor,
edge_type=second_tensor,
num_nodes=combined_tensor.max().item() + 1,
)
seed_everything(1)
transform = RandomLinkSplit(
num_val=0.1,
num_test=0.1,
)
train_data, val_data, test_data = transform(data)
device = torch.device('cuda:0')
z.write(f'Using device: {device}\n')
z.flush()
print('Using device:', device)
model = ComplEx(
num_nodes=train_data.num_nodes,
num_relations=train_data.num_edge_types,
hidden_channels=256,
).to(device)
loader = model.loader(
head_index=train_data.edge_index[0].to(device),
rel_type=train_data.edge_type.to(device),
tail_index=train_data.edge_index[1].to(device),
batch_size=2000,
shuffle=True,
)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-6)
def train():
model.train()
total_loss = total_examples = 0
for head_index, rel_type, tail_index in loader:
optimizer.zero_grad()
loss = model.loss(head_index, rel_type, tail_index)
loss.backward()
optimizer.step()
total_loss += float(loss) * head_index.numel()
total_examples += head_index.numel()
return total_loss / total_examples
@torch.no_grad()
def test(val_data, k):
model.eval()
head_index = val_data.edge_index[0].to(device)
rel_type = val_data.edge_type.to(device)
tail_index = val_data.edge_index[1].to(device)
return model.test(
head_index=head_index,
rel_type=rel_type,
tail_index=tail_index,
batch_size=20000,
k=k,
)
for epoch in range(1, 901):
loss = train()
z.write(f'Epoch: {epoch:03d}, Loss: {loss:.4f}\n')
z.flush()
'''
Uncomment if you want to evaluate the training every 300 epochs based on the validation set (e.g., for early stopping)
if epoch % 300 == 0:
rank, hits = test(val_data, k=10)
z.write(f'Epoch: {epoch:03d}, Val Mean Rank: {rank:.2f}, Val Hits@10: {hits:.4f}\n')
z.flush()
'''
k_values = [1, 3, 10]
results = {}
for k in k_values:
results[k] = test(test_data, k)
for k in k_values:
rank, hits_at_k = results[k]
z.write(f'Test Mean Rank: {rank:.2f}, Test Hits@{k}: {hits_at_k:.4f}\n')
z.flush()
end_time = time.ctime()
z.write(f'Calculate embeddings with ComplEx end at: {end_time}\n')
z.flush()
print('Calculate embeddings with ComplEx end at: ', end_time)
print('Done')