forked from mir-group/nequip
-
Notifications
You must be signed in to change notification settings - Fork 0
/
full.yaml
224 lines (197 loc) · 15.6 KB
/
full.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# a full yaml file with all nequip options
# this is primarily intested to serve as documentation/reference for all options
# for a simpler yaml file containing all necessary feature to get you started check out example.yaml
# Two folders will be used during the training: 'root'/process and 'root'/'run_name'
# run_name contains logfiles and saved models
# process contains processed data sets
# if 'root'/'run_name' exists, 'root'/'run_name'_'year'-'month'-'day'-'hour'-'min'-'s' will be used instead.
root: results/aspirin
run_name: example-run-full
seed: 0 # random number seed for numpy and torch
restart: false # set True for a restarted run
append: false # set True if a restarted run should append to the previous log file
default_dtype: float32 # type of float to use, e.g. float32 and float64
allow_tf32: True # whether to use TensorFloat32 if it is available
# network
r_max: 4.0 # cutoff radius in length units
num_layers: 6 # number of interaction blocks, we found 5-6 to work best
chemical_embedding_irreps_out: 32x0e # irreps for the chemical embedding of species
feature_irreps_hidden: 32x0o + 32x0e + 16x1o + 16x1e + 8x2o + 8x2e # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
irreps_edge_sh: 0e + 1o + 2e # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
conv_to_output_hidden_irreps_out: 16x0e # irreps used in hidden layer of output block
nonlinearity_type: gate # may be 'gate' or 'norm', 'gate' is recommended
# scalar nonlinearities to use — available options are silu, ssp (shifted softplus), tanh, and abs.
# Different nonlinearities are specified for e (even) and o (odd) parity;
# note that only tanh and abs are correct for o (odd parity).
nonlinearity_scalars:
e: ssp
o: tanh
nonlinearity_gates:
e: ssp
o: abs
resnet: false # set true to make interaction block a resnet-style update
num_basis: 8 # number of basis functions used in the radial basis
BesselBasis_trainable: true # set true to train the bessel weights
PolynomialCutoff_p: 6 # p-exponent used in polynomial cutoff function
# radial network
invariant_layers: 2 # number of radial layers, we found it important to keep this small, 1 or 2
invariant_neurons: 64 # number of hidden neurons in radial function, smaller is faster
avg_num_neighbors: null # number of neighbors to divide by, None => no normalization.
use_sc: true # use self-connection or not, usually gives big improvement
compile_model: false # whether to compile the constructed model to TorchScript
# to specify different parameters for each convolutional layer, try examples below
# layer1_use_sc: true # use "layer{i}_" prefix to specify parameters for only one of the layer,
# priority for different definition:
# invariant_neurons < InteractionBlock_invariant_neurons < layer{i}_invariant_neurons
# data set
# the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
# key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
# all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
# note that if your data set uses pbc, you need to also pass an array that maps to the nequip "pbc" key
dataset: npz # type of data set, can be npz or ase
dataset_url: http://quantum-machine.org/gdml/data/npz/aspirin_ccsd.zip # url to download the npz. optional
dataset_file_name: ./benchmark_data/aspirin_ccsd-train.npz # path to data set file
key_mapping:
z: atomic_numbers # atomic species, integers
E: total_energy # total potential eneriges to train to
F: forces # atomic forces to train to
R: pos # raw atomic positions
npz_fixed_field_keys: # fields that are repeated across different examples
- atomic_numbers
# As an alternative option to npz, you can also pass data ase ASE Atoms-objects
# This can often be easier to work with, simply make sure the ASE Atoms object
# has a calculator for which atoms.get_potential_energy() and atoms.get_forces() are defined
# dataset: ase
# dataset_file_name: xxx.xyz # need to be a format accepted by ase.io.read
# ase_args: # any arguments needed by ase.io.read
# format: extxyz
# logging
wandb: false # we recommend using wandb for logging, we'll turn it off here as it's optional
wandb_project: aspirin # project name used in wandb
wandb_resume: true # if true and restart is true, wandb run data will be restarted and updated.
# if false, a new wandb run will be generated
verbose: info # the same as python logging, e.g. warning, info, debug, error. case insensitive
log_batch_freq: 1 # batch frequency, how often to print training errors withinin the same epoch
log_epoch_freq: 1 # epoch frequency, how often to print and save the model
save_checkpoint_freq: -1 # frequency to save the intermediate checkpoint. no saving when the value is not positive.
save_ema_checkpoint_freq: -1 # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.
# training
n_train: 100 # number of training data
n_val: 50 # number of validation data
learning_rate: 0.01 # learning rate, we found values between 0.01 and 0.005 to work best - this is often one of the most important hyperparameters to tune
batch_size: 5 # batch size, we found it important to keep this small for most applications (1-5)
max_epochs: 1000000 # stop training after _ number of epochs
train_val_split: random # can be random or sequential. if sequential, first n_train elements are training, next n_val are val, else random, usually random is the right choice
shuffle: true # If true, the data loader will shuffle the data, usually a good idea
metrics_key: loss # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
use_ema: false # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
ema_decay: 0.999 # ema weight, commonly set to 0.999
ema_use_num_updates: true # whether to use number of updates when computing averages
# early stopping based on metrics values.
# LR, wall and any keys printed in the log file can be used.
# The key can start with Training or Validation. If not defined, the validation value will be used.
early_stopping_patiences: # stop early if a metric value stopped decreasing for n epochs
Validation_loss: 50 #
Training_loss: 100 #
e_mae: 100 #
early_stopping_delta: # If delta is defined, a tiny decrease smaller than delta will not be considered as a decrease
Training_loss: 0.005 #
early_stopping_cumulative_delta: false # If True, the minimum value recorded will not be updated when the decrease is smaller than delta
early_stopping_lower_bounds: # stop early if a metric value is lower than the bound
LR: 1.0e-10 #
early_stopping_upper_bounds: # stop early if a metric value is higher than the bound
wall: 1.0e+100 #
# loss function
loss_coeffs: # different weights to use in a weighted loss functions
forces: 100 # for MD applications, we recommed a force weight of 100 and an energy weight of 1
total_energy: 1 # alternatively, if energies are not of importance, a force weight 1 and an energy weight of 0 also works.
# # default loss function is MSELoss, the name has to be exactly the same as those in torch.nn.
# the only supprted targets are forces and total_energy
# here are some example of more ways to declare different types of loss functions, depending on your application:
# loss_coeffs:
# total_energy: MSELoss
#
# loss_coeffs:
# total_energy:
# - 3.0
# - MSELoss
#
# loss_coeffs:
# forces:
# - 1.0
# - PerSpeciesL1Loss
#
# loss_coeffs: total_energy
#
# loss_coeffs:
# total_energy:
# - 3.0
# - L1Loss
# forces: 1.0
# output metrics
metrics_components:
- - forces # key
- rmse # "rmse" or "mse"
- PerSpecies: True # if true, per species contribution is counted separately
report_per_component: False # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
- - forces
- mae
- PerSpecies: True
report_per_component: False
- - total_energy
- mae
# optimizer, may be any optimizer defined in torch.optim
# the name `optimizer_name`is case sensitive
optimizer_name: Adam # default optimizer is Adam in the amsgrad mode
optimizer_amsgrad: true
optimizer_betas: !!python/tuple
- 0.9
- 0.999
optimizer_eps: 1.0e-08
optimizer_weight_decay: 0
# weight initialization
# this can be the importable name of any function that can be `model.apply`ed to initialize some weights in the model. NequIP provides a number of useful initializers:
# For more details please see the docstrings of the individual initializers
#model_initializers:
# - nequip.utils.initialization.uniform_initialize_fcs
# - nequip.utils.initialization.uniform_initialize_equivariant_linears
# - nequip.utils.initialization.uniform_initialize_tp_internal_weights
# - nequip.utils.initialization.xavier_initialize_fcs
# - nequip.utils.initialization.(unit_)orthogonal_initialize_equivariant_linears
# - nequip.utils.initialization.(unit_)orthogonal_initialize_fcs
# - nequip.utils.initialization.(unit_)orthogonal_initialize_e3nn_fcs
# lr scheduler, currently only supports the two options listed below, if you need more please file an issue
# first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
lr_scheduler_name: ReduceLROnPlateau
lr_scheduler_patience: 100
lr_scheduler_factor: 0.5
# second, consine annealing with warm restart
# lr_scheduler_name: CosineAnnealingWarmRestarts
# lr_scheduler_T_0: 10000
# lr_scheduler_T_mult: 2
# lr_scheduler_eta_min: 0
# lr_scheduler_last_epoch: -1
# we provide a series of options to shift and scale the data
# these are for advanced use and usually the defaults work very well
# the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
# in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom
# whether to apply a shift and scale, defined per-species, to the atomic energies
PerSpeciesScaleShift_enable: false
# if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
PerSpeciesScaleShift_trainable: true
# optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
PerSpeciesScaleShift_shifts: [0.0, 0.0, 0.0]
# optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
PerSpeciesScaleShift_scales: [1.0, 1.0, 1.0]
# global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
global_rescale_shift: dataset_energy_mean
# global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
# If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
global_rescale_scale: dataset_force_rms
# whether the shift of the final global energy rescaling should be trainable
trainable_global_rescale_shift: false
# whether the scale of the final global energy rescaling should be trainable
trainable_global_rescale_scale: false
# Options for e3nn's set_optimization_defaults. A dict:
# e3nn_optimization_defaults:
# explicit_backward: True