configs/full.yaml

# a full yaml file with all nequip options
# this is primarily intested to serve as documentation/reference for all options
# for a simpler yaml file containing all necessary feature to get you started check out example.yaml

# Two folders will be used during the training: 'root'/process and 'root'/'run_name'
# run_name contains logfiles and saved models
# process contains processed data sets
# if 'root'/'run_name' exists, 'root'/'run_name'_'year'-'month'-'day'-'hour'-'min'-'s' will be used instead.
root: results/aspirin
run_name: example-run-full
seed: 0                                                                           # random number seed for numpy and torch
restart: false                                                                    # set True for a restarted run
append: false                                                                     # set True if a restarted run should append to the previous log file
default_dtype: float32                                                            # type of float to use, e.g. float32 and float64
allow_tf32: True                                                                  # whether to use TensorFloat32 if it is available

# network
r_max: 4.0                                                                        # cutoff radius in length units

num_layers: 6                                                                     # number of interaction blocks, we found 5-6 to work best
chemical_embedding_irreps_out: 32x0e                                              # irreps for the chemical embedding of species
feature_irreps_hidden: 32x0o + 32x0e + 16x1o + 16x1e + 8x2o + 8x2e                # irreps used for hidden features, here we go up to lmax=2, with even and odd parities
irreps_edge_sh: 0e + 1o + 2e                                                      # irreps of the spherical harmonics used for edges. If a single integer, indicates the full SH up to L_max=that_integer
conv_to_output_hidden_irreps_out: 16x0e                                           # irreps used in hidden layer of output block

nonlinearity_type: gate                                                           # may be 'gate' or 'norm', 'gate' is recommended

# scalar nonlinearities to use — available options are silu, ssp (shifted softplus), tanh, and abs.
# Different nonlinearities are specified for e (even) and o (odd) parity;
# note that only tanh and abs are correct for o (odd parity).
nonlinearity_scalars:
  e: ssp
  o: tanh
nonlinearity_gates:
  e: ssp
  o: abs

resnet: false                                                                     # set true to make interaction block a resnet-style update

num_basis: 8                                                                      # number of basis functions used in the radial basis
BesselBasis_trainable: true                                                       # set true to train the bessel weights
PolynomialCutoff_p: 6                                                             # p-exponent used in polynomial cutoff function

# radial network
invariant_layers: 2                                                               # number of radial layers, we found it important to keep this small, 1 or 2
invariant_neurons: 64                                                             # number of hidden neurons in radial function, smaller is faster
avg_num_neighbors: null                                                           # number of neighbors to divide by, None => no normalization.
use_sc: true                                                                      # use self-connection or not, usually gives big improvement
compile_model: false                                                              # whether to compile the constructed model to TorchScript

# to specify different parameters for each convolutional layer, try examples below
# layer1_use_sc: true                                                             # use "layer{i}_" prefix to specify parameters for only one of the layer,
# priority for different definition:
#   invariant_neurons < InteractionBlock_invariant_neurons < layer{i}_invariant_neurons

# data set
# the keys used need to be stated at least once in key_mapping, npz_fixed_field_keys or npz_keys
# key_mapping is used to map the key in the npz file to the NequIP default values (see data/_key.py)
# all arrays are expected to have the shape of (nframe, natom, ?) except the fixed fields
# note that if your data set uses pbc, you need to also pass an array that maps to the nequip "pbc" key
dataset: npz                                                                       # type of data set, can be npz or ase
dataset_url: http://quantum-machine.org/gdml/data/npz/aspirin_ccsd.zip             # url to download the npz. optional
dataset_file_name: ./benchmark_data/aspirin_ccsd-train.npz                         # path to data set file
key_mapping:
  z: atomic_numbers                                                                # atomic species, integers
  E: total_energy                                                                  # total potential eneriges to train to
  F: forces                                                                        # atomic forces to train to
  R: pos                                                                           # raw atomic positions
npz_fixed_field_keys:                                                              # fields that are repeated across different examples
  - atomic_numbers

# As an alternative option to npz, you can also pass data ase ASE Atoms-objects
# This can often be easier to work with, simply make sure the ASE Atoms object
# has a calculator for which atoms.get_potential_energy() and atoms.get_forces() are defined
# dataset: ase
# dataset_file_name: xxx.xyz                                                       # need to be a format accepted by ase.io.read
# ase_args:                                                                        # any arguments needed by ase.io.read
#   format: extxyz

# logging
wandb: false                                                                        # we recommend using wandb for logging, we'll turn it off here as it's optional
wandb_project: aspirin                                                             # project name used in wandb
wandb_resume: true                                                                 # if true and restart is true, wandb run data will be restarted and updated.
                                                                                   # if false, a new wandb run will be generated
verbose: info                                                                      # the same as python logging, e.g. warning, info, debug, error. case insensitive
log_batch_freq: 1                                                                  # batch frequency, how often to print training errors withinin the same epoch
log_epoch_freq: 1                                                                  # epoch frequency, how often to print and save the model
save_checkpoint_freq: -1                                                           # frequency to save the intermediate checkpoint. no saving when the value is not positive.
save_ema_checkpoint_freq: -1                                                       # frequency to save the intermediate ema checkpoint. no saving when the value is not positive.

# training
n_train: 100                                                                       # number of training data
n_val: 50                                                                          # number of validation data
learning_rate: 0.01                                                                # learning rate, we found values between 0.01 and 0.005 to work best - this is often one of the most important hyperparameters to tune
batch_size: 5                                                                      # batch size, we found it important to keep this small for most applications (1-5)
max_epochs: 1000000                                                                # stop training after _ number of epochs
train_val_split: random                                                            # can be random or sequential. if sequential, first n_train elements are training, next n_val are val, else random, usually random is the right choice
shuffle: true                                                                      # If true, the data loader will shuffle the data, usually a good idea
metrics_key: loss                                                                  # metrics used for scheduling and saving best model. Options: loss, or anything that appears in the validation batch step header, such as f_mae, f_rmse, e_mae, e_rmse
use_ema: false                                                                     # if true, use exponential moving average on weights for val/test, usually helps a lot with training, in particular for energy errors
ema_decay: 0.999                                                                   # ema weight, commonly set to 0.999
ema_use_num_updates: true                                                          # whether to use number of updates when computing averages

# early stopping based on metrics values. 
# LR, wall and any keys printed in the log file can be used. 
# The key can start with Training or Validation. If not defined, the validation value will be used.
early_stopping_patiences:                                                          # stop early if a metric value stopped decreasing for n epochs
  Validation_loss: 50                                                              # 
  Training_loss: 100                                                               # 
  e_mae: 100                                                                         # 
early_stopping_delta:                                                              # If delta is defined, a tiny decrease smaller than delta will not be considered as a decrease
  Training_loss: 0.005                                                             # 
early_stopping_cumulative_delta: false                                             # If True, the minimum value recorded will not be updated when the decrease is smaller than delta
early_stopping_lower_bounds:                                                       # stop early if a metric value is lower than the bound
  LR: 1.0e-10                                                                        # 
early_stopping_upper_bounds:                                                       # stop early if a metric value is higher than the bound
  wall: 1.0e+100                                                                      # 

# loss function
loss_coeffs:                                                                       # different weights to use in a weighted loss functions
  forces: 100                                                                      # for MD applications, we recommed a force weight of 100 and an energy weight of 1
  total_energy: 1                                                                  # alternatively, if energies are not of importance, a force weight 1 and an energy weight of 0 also works.

# # default loss function is MSELoss, the name has to be exactly the same as those in torch.nn.
# the only supprted targets are forces and total_energy

# here are some example of more ways to declare different types of loss functions, depending on your application:
# loss_coeffs:
#   total_energy: MSELoss
#
# loss_coeffs:
#   total_energy:
#   - 3.0
#   - MSELoss
#
# loss_coeffs:
#   forces:
#   - 1.0
#   - PerSpeciesL1Loss
#
# loss_coeffs: total_energy
#
# loss_coeffs:
#   total_energy:
#   - 3.0
#   - L1Loss
#   forces: 1.0

# output metrics
metrics_components:
  - - forces                               # key
    - rmse                                 # "rmse" or "mse"
    - PerSpecies: True                     # if true, per species contribution is counted separately
      report_per_component: False          # if true, statistics on each component (i.e. fx, fy, fz) will be counted separately
  - - forces
    - mae
    - PerSpecies: True
      report_per_component: False
  - - total_energy
    - mae

# optimizer, may be any optimizer defined in torch.optim
# the name `optimizer_name`is case sensitive
optimizer_name: Adam                                                               # default optimizer is Adam in the amsgrad mode
optimizer_amsgrad: true
optimizer_betas: !!python/tuple
  - 0.9
  - 0.999
optimizer_eps: 1.0e-08
optimizer_weight_decay: 0

# weight initialization
# this can be the importable name of any function that can be `model.apply`ed to initialize some weights in the model. NequIP provides a number of useful initializers:
# For more details please see the docstrings of the individual initializers
#model_initializers:
#  - nequip.utils.initialization.uniform_initialize_fcs
#  - nequip.utils.initialization.uniform_initialize_equivariant_linears
#  - nequip.utils.initialization.uniform_initialize_tp_internal_weights
#  - nequip.utils.initialization.xavier_initialize_fcs
#  - nequip.utils.initialization.(unit_)orthogonal_initialize_equivariant_linears
#  - nequip.utils.initialization.(unit_)orthogonal_initialize_fcs
#  - nequip.utils.initialization.(unit_)orthogonal_initialize_e3nn_fcs

# lr scheduler, currently only supports the two options listed below, if you need more please file an issue
# first: on-plateau, reduce lr by factory of lr_scheduler_factor if metrics_key hasn't improved for lr_scheduler_patience epoch
lr_scheduler_name: ReduceLROnPlateau
lr_scheduler_patience: 100
lr_scheduler_factor: 0.5

# second, consine annealing with warm restart
# lr_scheduler_name: CosineAnnealingWarmRestarts
# lr_scheduler_T_0: 10000
# lr_scheduler_T_mult: 2
# lr_scheduler_eta_min: 0
# lr_scheduler_last_epoch: -1

# we provide a series of options to shift and scale the data
# these are for advanced use and usually the defaults work very well
# the deafult is to scale the energies and forces by scaling them by the force standard deviation and to shift the energy by its mean
# in certain cases, it can be useful to have a trainable shift/scale and to also have species-dependent shifts/scales for each atom

# whether to apply a shift and scale, defined per-species, to the atomic energies
PerSpeciesScaleShift_enable: false
# if the PerSpeciesScaleShift is enabled, whether the shifts and scales are trainable
PerSpeciesScaleShift_trainable: true
# optional initial atomic energy shift for each species. order should be the same as the allowed_species used in train.py. Defaults to zeros.
PerSpeciesScaleShift_shifts: [0.0, 0.0, 0.0]
# optional initial atomic energy scale for each species. order should be the same as the allowed_species used in train.py. Defaults to ones.
PerSpeciesScaleShift_scales: [1.0, 1.0, 1.0]

# global energy shift. When "dataset_energy_mean" (the default), the mean energy of the dataset. When None, disables the global shift. When a number, used directly.
global_rescale_shift: dataset_energy_mean
# global energy scale. When "dataset_force_rms", the RMS of force components in the dataset. When "dataset_energy_std", the stdev of energies in the dataset. When None, disables the global scale. When a number, used directly.
# If not provided, defaults to either dataset_force_rms or dataset_energy_std, depending on whether forces are being trained.
global_rescale_scale: dataset_force_rms
# whether the shift of the final global energy rescaling should be trainable
trainable_global_rescale_shift: false
# whether the scale of the final global energy rescaling should be trainable
trainable_global_rescale_scale: false


# Options for e3nn's set_optimization_defaults. A dict:
# e3nn_optimization_defaults:
#   explicit_backward: True