-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodel_configuration.schema.json
363 lines (363 loc) · 16.3 KB
/
model_configuration.schema.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://github.com/high-dimensional/3d_very_deep_vae/model_configuration.schema.json",
"title": "Model configuration",
"description": "Schema for JSON file specifying configuration of a very deep variational autoencoder model and training run",
"type": "object",
"additionalProperties": false,
"properties": {
"total_epochs": {
"description": "Total number of epochs to train model for",
"type": "integer",
"exclusiveMinimum": 0
},
"batch_size": {
"description": "Number of training points in each minibatch",
"type": "integer",
"exclusiveMinimum": 0
},
"resolution": {
"description": "Resolution of (volumetric) images to train model to generate along all dimensions. Must be an integer power of 2.",
"type": "integer",
"exclusiveMinimum": 0
},
"channels": {
"description": "Number of output channels in the encoder's residual network (ResNet) blocks. Length must be equal to base-2 logarithm of `resolution` plus one.",
"type": "array",
"items": {
"type": "integer",
"exclusiveMinimum": 0
}
},
"kernel_sizes_bottom_up": {
"description": "At each resolution (decreasing order), the side lengths of the encoder's kernels. Length must be equal to base-2 logarithm of `resolution` plus one.",
"type": "array",
"items": {
"type": "integer",
"exclusiveMinimum": 0
}
},
"kernel_sizes_top_down": {
"description": "At each resolution (decreasing order), the side lengths of the decoder's kernels. Length must be equal to base-2 logarithm of `resolution` plus one.",
"type": "array",
"items": {
"type": "integer",
"exclusiveMinimum": 0
}
},
"channels_hidden": {
"description": "Number of intermediate channels in the encoder's residual network (ResNet) blocks. Length must be equal to base-2 logarithm of `resolution` plus one.",
"type": "array",
"items": {
"type": "integer",
"exclusiveMinimum": 0
}
},
"channels_top_down": {
"description": "Number of output channels in the decoder's residual network (ResNet) blocks. Length must be equal to base-2 logarithm of `resolution` plus one.",
"type": "array",
"items": {
"type": "integer",
"exclusiveMinimum": 0
}
},
"channels_hidden_top_down": {
"description": "Number of intermediate channels in the decoder's residual network (ResNet) blocks. Length must be equal to base-2 logarithm of `resolution` plus one.",
"type": "array",
"items": {
"type": "integer",
"exclusiveMinimum": 0
}
},
"latent_feature_maps_per_resolution": {
"description": "Number of latent feature maps at each resolution. Length must be equal to base-2 logarithm of `resolution` plus one.",
"type": "array",
"items": {
"type": "integer",
"exclusiveMinimum": 0
}
},
"channels_per_latent": {
"description": "Number of channels per latent feature map. Length must be equal to base-2 logarithm of `resolution` plus one.",
"type": "array",
"items": {
"type": "integer",
"exclusiveMinimum": 0
}
},
"random_seed": {
"description": "Integer seed to use to initialise state of pseudo-random number generator(s) used in training for reproducible runs. Using a `null` value will result in non-deterministic training runs.",
"default": null,
"oneOf": [
{"type": "null"},
{"type": "integer", "minimum": 0, "maximum": 4294967295}
]
},
"max_niis_to_use": {
"description": "The maximum number of NiFTI files to use in a training epoch. Use this to define a shorter epoch, for example to quickly test visualisations are being saved correctly. Using a `null` value will result in all available files being used.",
"default": null,
"oneOf": [
{"type": "null"},
{"type": "integer", "exclusiveMinimum": 0}
]
},
"warmup_iterations": {
"description": "Iterations to wait before skipping excessively large gradient updates",
"type": "integer",
"exclusiveMinimum": 0,
"default": 50
},
"plot_recons_period": {
"description": "Frequency (in epochs) with which to plot reconstructions",
"type": "integer",
"exclusiveMinimum": 0,
"default": 1
},
"subjects_to_plot": {
"description": "Number of subjects to include when plotting reconstructions",
"type": "integer",
"exclusiveMinimum": 0,
"default": 4
},
"validation_period": {
"description": "Frequency (in epochs) with which to evaluate the model on the validation set",
"type": "integer",
"exclusiveMinimum": 0,
"default": 1
},
"save_period": {
"description": "Frequency (in epochs) with which to save checkpoints",
"type": "integer",
"exclusiveMinimum": 0,
"default": 1
},
"l2_reg_coeff": {
"description": "Coefficient scaling L2 regularization term in objective",
"type": "number",
"minimum": 0,
"default": 1e-4
},
"learning_rate": {
"description": "Scalar controlling magnitude of stochastic gradient steps",
"type": "number",
"minimum": 0,
"default": 1e-3
},
"train_frac": {
"description": "Fraction of data to use for training with remainder used for validation",
"type": "number",
"minimum": 0,
"maximum": 1,
"default": 0.95
},
"gradient_clipping_value": {
"description": "Upper limit for the gradient norm, used when clamping gradients before applying gradient updates",
"type": "number",
"exclusiveMinimum": 0,
"default": 1e2
},
"gradient_skipping_value": {
"description": "If the gradient norm exceeds this value, skip that iteration's gradient update",
"type": "number",
"exclusiveMinimum": 0,
"default": 1e12
},
"scale_hidden_clamp_bounds": {
"description": "Lower and upper bound on the standard deviation of the prior and posterior Gaussian distributions of the latent variables",
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 2,
"maxItems": 2,
"default": [0.001, 1]
},
"scale_output_clamp_bounds": {
"description": "Lower and upper bound on scale (standard deviation of the Gaussian distribution of the input given the latent",
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 2,
"maxItems": 2,
"default": [0.01, 1]
},
"latent_feature_maps_per_resolution_weight_sharing": {
"description": "Either, an array of boolean flags specifying whether to use a shared set of weights to predict the latents at that resolution, one per resolution in decreasing order (that is array length must be equal to base-2 logarithm of `resolution` plus one), or, one of the strings `\"all\"` or `\"none\"`, corresponding to flags being `true` or `false` across all resolutions respectively.",
"oneOf": [
{
"type": "string",
"enum": ["none", "all"]
},
{
"type": "array",
"items": {
"type": "boolean"
},
"minItems": 1
}
],
"default": "none"
},
"latents_to_use": {
"description": "Either, an array of boolean flags specifying whether to use each latent feature map (instead of the the deterministic residual network block output), one per latent feature map in order they appear in per-resolution blocks (that is array length must be equal to sum of values in `latent_feature_maps_per_resolution`), or, one of the strings `\"all\"` or `\"none\"`, corresponding to flags being `true` or `false` across all feature maps respectively.",
"oneOf": [
{
"type": "string",
"enum": ["none", "all"]
},
{
"type": "array",
"items": {
"type": "boolean"
},
"minItems": 1
}
],
"default": "all"
},
"latents_to_optimise": {
"description": "Either, an array of boolean flags specifying whether to optimise the parameters for the network components controlling each latent feature map, one per latent feature map in order they appear in per-resolution blocks (that is array length must be equal to sum of values in `latent_feature_maps_per_resolution`), or, one of the strings `\"all\"` or `\"none\"`, corresponding to flags being `true` or `false` across all feature maps respectively.",
"oneOf": [
{
"type": "string",
"enum": ["none", "all"]
},
{
"type": "array",
"items": {
"type": "boolean"
},
"minItems": 1
}
],
"default": "all"
},
"half_precision": {
"description": "Whether to train model using 16-bit floating point precision",
"type": "boolean",
"default": false
},
"output_activation_function": {
"description": "Which activation function to use in computing location of Gaussian distribution given latents. Choices are `\"tanh\"` corresponding to hyperbolic tangent activation function (with range `[-1, 1]`), `\"sigmoid\"` corresponding to logistic sigmoid (with range `[0, 1]`) or `\"identity\"` corresponding to identity function.",
"type": "string",
"enum": ["tanh", "sigmoid", "identity"],
"default": "tanh"
},
"plot_gradient_norms": {
"description": "Plot the norms of the gradients after each epoch",
"type": "boolean",
"default": true
},
"resume_from_checkpoint": {
"description": "Resume training from a checkpoint",
"type": "boolean",
"default": false
},
"restore_optimiser": {
"description": "When resuming training, restore the state of the optimiser (set to false to reset the optimiser's parameters and start training from epoch 1)",
"type": "boolean",
"default": true
},
"keep_every_checkpoint": {
"description": "Save, and keep, a checkpoint every epoch rather than just keeping the latest one",
"type": "boolean",
"default": true
},
"predict_x_scale": {
"description": "Model the scale, not just the location, of the Gaussian distribution of the input given its latent",
"type": "boolean",
"default": true
},
"use_precision_reweighting": {
"description": "Re-weight the locations and scales of the prior and posterior distributions of the latents according to the scheme in the paper _Ladder variational autoencoders_ (Sønderby et al. 2016)",
"type": "boolean",
"default": false
},
"verbose": {
"description": "Print more detail in output during training",
"type": "boolean",
"default": true
},
"bottleneck_resnet_encoder": {
"description": "In the encoder, use a three layer Resnet block with a middle layer that has fewer channels than the output layer (the bottleneck). Alternatively, use a two-layer Resnet block whose layers have equal numbers of output channels",
"type": "boolean",
"default": true
},
"normalise_weight_by_depth": {
"description": "Normalise each convolution block's randomly initialised kernel parameters by the (square root of the) depth of that block.",
"type": "boolean",
"default": true
},
"zero_biases": {
"description": "Set each convolution block's bias to zero after initialising it",
"type": "boolean",
"default": true
},
"use_rezero": {
"description": "Use skip connections where the 'non-skip' part of the layer is multiplied by a scalar initialised to zero, as described in the paper _ReZero is all you need: fast convergence at large depth_ (Bachlechner et al. 2021).",
"type": "boolean",
"default": false
},
"veto_batch_norm": {
"description": "Do not use batch normalisation anywhere",
"type": "boolean",
"default": true
},
"veto_transformations": {
"description": "Do not apply augmentations to the training data",
"type": "boolean",
"default": false
},
"convolutional_downsampling": {
"description": "Down-sample using stride-two convolutions, rather than x2 nearest neighbour downsampling",
"type": "boolean",
"default": false
},
"predict_x_scale_with_sigmoid": {
"description": "Predict the scale of the Gaussian distribution of the input given its latent using a (scaled) sigmoid, rather than predicting the natural logarithm of the scale then exponentiating",
"type": "boolean",
"default": true
},
"only_use_one_conv_block_at_top": {
"description": "Use a truncated sequence of layers to predict from the latents the location and scale of the Gaussian distribution of the input given its latent",
"type": "boolean",
"default": false
},
"separate_hidden_loc_scale_convs": {
"description": "Do not just use one convolutional block, with a two-channel output, to predict the location and scale of the prior and posterior Gaussian distributions of the latents. Instead use separate blocks for the location and scale.",
"type": "boolean",
"default": false
},
"separate_output_loc_scale_convs": {
"description": "Do not just use one convolutional block, with a two-channel output, to predict the location and scale of the prior and posterior Gaussian distributions of the input given ts latent. Instead use separate blocks for the location and scale.",
"type": "boolean",
"default": false
},
"apply_augmentations_to_validation_set": {
"description": "Apply to the validation set the same augmentations applied to the training set",
"type": "boolean",
"default": false
},
"visualise_training_pipeline_before_starting": {
"description": "Plot examples of the augmented training points before training begins",
"type": "boolean",
"default": true
}
},
"required": [
"total_epochs",
"batch_size",
"resolution",
"channels",
"kernel_sizes_bottom_up",
"channels_hidden",
"channels_top_down",
"channels_hidden_top_down",
"latent_feature_maps_per_resolution",
"channels_per_latent",
"kernel_sizes_top_down"
]
}