-
Notifications
You must be signed in to change notification settings - Fork 2
/
config_model.py
99 lines (84 loc) · 2.09 KB
/
config_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import copy
import texar.torch as tx
random_seed = 1234#1234
lr=2e-7
word_dim=768
max_positon_length=1024
hidden_dim = word_dim
moco_t=0.3
moco_m=0.9
sinkhorn_iterations=3
epsilon=0.05
mem_lr=5e-4
mem_t=0.3
bank_size=10
num_topics=50
emb = {
"name": "lookup_table",
"dim": word_dim,
"initializer": {
"type": "normal_",
"kwargs": {"mean": 0.0, "std": word_dim ** -0.5},
},
"trainable": True,
}
position_embedder_hparams = {"dim": word_dim}
encoder = {
"pretrained_model_name": "bert-base-uncased",
"embed": {
"dim": 768,
"name": "word_embeddings"
},
"vocab_size": 30522,
"segment_embed": {
"dim": 768,
"name": "token_type_embeddings"
},
"type_vocab_size": 2,
"position_embed": {
"dim": 768,
"name": "position_embeddings"
},
"position_size": 512,
"encoder": {
"dim": 768,
"embedding_dropout": 0.1,
"multihead_attention": {
"dropout_rate": 0.1,
"name": "self",
"num_heads": 12,
"num_units": 768,
"output_dim": 768,
"use_bias": True
},
"name": "encoder",
"num_blocks": 12,
"eps": 1e-12,
"poswise_feedforward": {
"layers": [
{
"kwargs": {
"in_features": 768,
"out_features": 3072,
"bias": True
},
"type": "Linear"
},
{"type": "BertGELU"},
{
"kwargs": {
"in_features": 3072,
"out_features": 768,
"bias": True
},
"type": "Linear"
}
]
},
"residual_dropout": 0.1,
"use_bert_config": True
},
"hidden_size": 768,
"initializer": None,
"name": "bert_encoder",
}