-
Notifications
You must be signed in to change notification settings - Fork 12
/
mockingjay_libri_MelBase.yaml
113 lines (93 loc) · 7.56 KB
/
mockingjay_libri_MelBase.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
mockingjay:
downsample_rate: 1 # stacked consecutive features vectors to reduce the length of input sequences by this factor.
hidden_size: 768 # Size of the encoder layers and the pooler layer.
num_hidden_layers: 3 # Number of hidden layers in the Transformer encoder.
num_attention_heads: 12 # Number of attention heads for each attention layer in the Transformer encoder.
intermediate_size: 3072 # The size of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
hidden_act: "gelu" # The non-linear activation function (function or string) in the encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
hidden_dropout_prob: 0.1 # The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob: 0.1 # The dropout ratio for the attention probabilities.
initializer_range: 0.02 # The sttdev of the truncated_normal_initializer for initializing all weight matrices.
layer_norm_eps: "1e-12" # The epsilon used by LayerNorm.
mask_proportion: 0.15 # mask this percentage of all spectrogram frames in each sequence at random during MAM training
mask_consecutive: 7 # mask this amount of consecutive frames
optimizer:
learning_rate: "4e-4" # Learning rate for opt
loss_scale: 0 # Loss scale to improve fp16 numeric stability. Only used when apex is set to True. 0: dynamic loss scaling. positive power of 2: static loss scaling.
warmup_proportion: 0.07 # Proportion of training to perform linear rate warmup.
gradient_accumulation_steps: 1 # Number of updates steps to accumulate before performing a backward/update pass
gradient_clipping: 1.0 # Maximum gradient norm
dataloader:
n_jobs: 6 # Subprocess used for torch Dataloader
batch_size: 6 # training batch size
dev_batch_size: 16 # used for dev/test splits
max_timestep: 3000 # Max length for audio feature (0 for no restriction)
max_label_len: 400 # Max length for output sequence (0 for no restriction)
# for sentiment
sentiment_config:
dataset: 'mosei' # suppor datasets: 'mosi', 'mosei'
mosi:
path: 'data/mosi_mel160'
label_mode: 'positive_negative' # suppor modes: 'positive_negative', 'original', 'positive_negative', 'regression'
standard_split: False
# if standard_split is set to False
random_seed: 1122
train_ratio: 0.9
mosei:
path: 'data/mosei'
feature: 'mel160'
label_mode: 'positive_negative' # suppor modes: 'regression', 'original', 'positive_negative', 'regression'
standard_split: False
# if standard_split is set to False
random_seed: 29
split_by: 'unsegmented' # 'unsegmented' or 'segmented'
split_ratio: 0.9
max_time: 20 # unit: sec
min_time: 1 # unit: sec
truncate_length: 5000 # unit: timestamp
sentiment_threshold: 0.1
sample_seed: 29
# LIBRISEECH SETTINGS
data_path: 'data/libri_mel160_subword5000' # Source data path
target_path: 'data/libri_linear1025_subword5000' # Target data path for reconstruction to a different feature type, set dataset to 'librispeech_mel_linear'.
phone_path: 'data/libri_phone' # phone boundary label data path for the phone classification task.
train_set: ['train-clean-360'] #
dev_set: ['dev-clean'] #
test_set: ['test-clean'] #
train_proportion: 1.0 # Currently only effect the `phone classification task`, use this percent of `train_set` for downstream task training to demonstrate mockingjay generality
solver:
# Training options
apex: False # Use APEX (see https://github.com/NVIDIA/apex for more details)
total_steps: 500000 # total steps for training, a step is a batch of update
log_step: 100 # log training status every this amount of training steps
dev_step: 10000 #
duo_feature: False # Use different input / output features during training
# models
load_model_list: ['SpecHead', 'Mockingjay'] # load the components in the list for test/eval
max_keep: 10 # maximum number of model ckpt to keep during training
save_step: 10000 # save model every this amount of training steps
downstream: # downstream model config
linear:
input_dim: 'None' # `int`, else if set to None, input_dim will be set according to mockingjay settings or mel-preprocessing dimensions automatically
hidden_size: 768
drop: 0.2
select_hidden: 'last' # support modes: ['last', 'first', 'average', 'weighted_sum', 'weighted_sum_norm']
rnn:
mode: 'classification' # support modes: 'classification' , 'regression'
input_dim: 'None' # `int`, else if set to None, input_dim will be set according to mockingjay settings or mel-preprocessing dimensions automatically
select_hidden: 'last' # support modes: ['last', 'first', 'average', 'weighted_sum', 'weighted_sum_norm']
sample_rate: 5 # `int`, sample every this number of frames
pre_linear_dims: []
hidden_size: 32 # size of hidden layer
post_linear_dims: [32]
drop: 0.5 # The dropout ratio
# Training options
learning_rate: "4e-3" # Learning rate for opt: ['4e-3' for fine-tune, '4e-3' for regualr downstream task training]
total_steps: 29078 # total steps for training, a step is a batch of update: [29078 for fine-tune, 500000 for regualr downstream task training ]
log_step: 500 # log training status every this amount of training steps
dev_step: 5000 # [58156 for fine-tune, 10000 for regualr downstream task training ]
evaluation: 'test' # can be 'dev' or 'test', show inference results right after saving model
# models
load_model_list: ['Classifier'] # load the components in the list for test/eval
max_keep: 20 # maximum number of model ckpt to keep during training
save_step: 2000 # save model every this amount of training steps