diff --git a/.github/workflows/architecture-tests.yml b/.github/workflows/architecture-tests.yml
index f7f2b1e9e..68f7bb68f 100644
--- a/.github/workflows/architecture-tests.yml
+++ b/.github/workflows/architecture-tests.yml
@@ -11,7 +11,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - architecture-name: alchemical-model
           - architecture-name: gap
           - architecture-name: soap-bpnn
           - architecture-name: pet
diff --git a/CODEOWNERS b/CODEOWNERS
index 72f9ef1f9..f2eccb263 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -3,7 +3,6 @@
 # is modified.
 
 **/soap_bpnn            @frostedoyster
-**/alchemical_model     @abmazitov
 **/pet                  @abmazitov
 **/gap                  @DavideTisi
 **/nanopet              @frostedoyster
diff --git a/README.rst b/README.rst
index 0016f8c7b..aa0b887c7 100644
--- a/README.rst
+++ b/README.rst
@@ -64,9 +64,6 @@ model.
       Positions (SOAP).
   * - SOAP BPNN
     - A Behler-Parrinello neural network with SOAP features
-  * - Alchemical Model
-    - A Behler-Parrinello neural network with SOAP features and Alchemical Compression
-      of the composition space
   * - PET
     - Point Edge Transformer (PET), interatomic machine learning potential
 
diff --git a/docs/src/advanced-concepts/auxiliary-outputs.rst b/docs/src/advanced-concepts/auxiliary-outputs.rst
index 3645a8b6c..4e872c5bf 100644
--- a/docs/src/advanced-concepts/auxiliary-outputs.rst
+++ b/docs/src/advanced-concepts/auxiliary-outputs.rst
@@ -31,13 +31,13 @@ by one or more architectures in the library:
 The following table shows the architectures that support each of the
 auxiliary outputs:
 
-+--------------------------------------------+-----------+------------------+-----+-----+---------+
-| Auxiliary output                           | SOAP-BPNN | Alchemical Model | PET | GAP | NanoPET |
-+--------------------------------------------+-----------+------------------+-----+-----+---------+
-| ``mtt::aux::{target}_last_layer_features`` |    Yes    |       No         | No  | No  |   Yes   |
-+--------------------------------------------+-----------+------------------+-----+-----+---------+
-| ``features``                               |    Yes    |       No         | No  | No  |   Yes   |
-+--------------------------------------------+-----------+------------------+-----+-----+---------+
++--------------------------------------------+-----------+-----+-----+---------+
+| Auxiliary output                           | SOAP-BPNN | PET | GAP | NanoPET |
++--------------------------------------------+-----------+-----+-----+---------+
+| ``mtt::aux::{target}_last_layer_features`` |    Yes    | No  | No  |   Yes   |
++--------------------------------------------+-----------+-----+-----+---------+
+| ``features``                               |    Yes    | No  | No  |   Yes   |
++--------------------------------------------+-----------+-----+-----+---------+
 
 The following tables show the metadata that will be provided for each of the
 auxiliary outputs:
diff --git a/docs/src/advanced-concepts/fitting-generic-targets.rst b/docs/src/advanced-concepts/fitting-generic-targets.rst
index 10e415900..5fad6ab34 100644
--- a/docs/src/advanced-concepts/fitting-generic-targets.rst
+++ b/docs/src/advanced-concepts/fitting-generic-targets.rst
@@ -23,11 +23,6 @@ capabilities of the architectures in metatrain.
      - Yes
      - Only with ``o3_lambda=1, o3_sigma=1``
      - No
-   * - Alchemical Model
-     - Energy, forces, stress/virial
-     - No
-     - No
-     - No
    * - GAP
      - Energy, forces
      - No
diff --git a/docs/src/architectures/alchemical-model.rst b/docs/src/architectures/alchemical-model.rst
deleted file mode 100644
index 4b37d3514..000000000
--- a/docs/src/architectures/alchemical-model.rst
+++ /dev/null
@@ -1,141 +0,0 @@
-.. _architecture-alchemical-model:
-
-Alchemical Model
-================
-
-.. warning::
-
-  This is an **experimental model**.  You should not use it for anything important.
-
-This is an implementation of Alchemical Model: a Behler-Parrinello neural network
-:footcite:p:`behler_generalized_2007` with Smooth overlab of atomic positions (SOAP)
-features :footcite:p:`bartok_representing_2013` and Alchemical Compression of the
-composition space :footcite:p:`willatt_feature_2018, lopanitsyna_modeling_2023,
-mazitov_surface_2024`. This model is extremely useful for simulating systems with
-large amount of chemical elements.
-
-
-Installation
-------------
-To install the package, you can run the following command in the root
-directory of the repository:
-
-.. code-block:: bash
-
-    pip install .[alchemical-model]
-
-This will install the package with the Alchemical Model dependencies.
-
-
-Default Hyperparameters
------------------------
-The default hyperparameters for the Alchemical Model model are:
-
-.. literalinclude:: ../../../src/metatrain/experimental/alchemical_model/default-hypers.yaml
-   :language: yaml
-
-
-Tuning Hyperparameters
-----------------------
-The default hyperparameters above will work well in most cases, but they
-may not be optimal for your specific dataset. In general, the most important
-hyperparameters to tune are (in decreasing order of importance):
-
-- ``cutoff``: This should be set to a value after which most of the
-  interactions between atoms is expected to be negligible.
-- ``num_pseudo_species``: This number determines the number of pseudo species
-  to use in the Alchemical Compression of the composition space. This value should
-  be adjusted based on the prior knowledge of the size of original chemical space
-  size.
-- ``learning_rate``: The learning rate for the neural network. This hyperparameter
-  controls how much the weights of the network are updated at each step of the
-  optimization. A larger learning rate will lead to faster training, but might cause
-  instability and/or divergence.
-- ``batch_size``: The number of samples to use in each batch of training. This
-  hyperparameter controls the tradeoff between training speed and memory usage. In
-  general, larger batch sizes will lead to faster training, but might require more
-  memory.
-- ``hidden_sizes``:
-  This hyperparameter controls the size and depth of the descriptors and the neural
-  network. In general, increasing this might lead to better accuracy,
-  especially on larger datasets, at the cost of increased training and evaluation time.
-- ``loss``: This section describes the loss function to be used, and it has three
-  subsections. 1. ``weights``. This controls the weighting of different contributions
-  to the loss (e.g., energy, forces, virial, etc.). The default values of 1.0 for all
-  targets work well for most datasets, but they might need to be adjusted. For example,
-  to set a weight of 1.0 for the energy and 0.1 for the forces, you can set the
-  following in the ``options.yaml`` file under ``loss``:
-  ``weights: {"energy": 1.0, "forces": 0.1}``. 2. ``type``. This controls the type of
-  loss to be used. The default value is ``mse``, and other options are ``mae`` and
-  ``huber``. ``huber`` is a subsection of its own, and it requires the user to specify
-  the ``deltas`` parameters in a similar way to how the ``weights`` are specified (e.g.,
-  ``deltas: {"energy": 0.1, "forces": 0.01}``). 3. ``reduction``. This controls how the
-  loss is reduced over batches. The default value is ``sum``, and the other allowed
-  option is ``mean``.
-
-
-Architecture Hyperparameters
-----------------------------
-:param name: ``experimental.alchemical_model``
-
-model
-#####
-soap
-^^^^
-:param num_pseudo_species: Number of pseudo species to use in the Alchemical Compression
-    of the composition space.
-:param cutoff_radius: Spherical cutoff (Å) to use for atomic environments.
-:param basis_cutoff: The maximal eigenvalue of the Laplacian Eigenstates (LE) basis
-    functions used as radial basis :footcite:p:`bigi_smooth_2022`. This controls how
-    large the radial-angular basis is.
-:param radial_basis_type: A type of the LE basis functions used as radial basis. The
-    supported radial basis functions are
-
-    - ``LE``: Original Laplacian Eigenstates raidal basis. These radial basis functions
-      can be set in the ``.yaml`` file as:
-
-      .. code-block:: yaml
-
-        radial_basis_type: "le"
-
-    - ``Physical``: Physically-motivated basis functions. These radial basis functions
-      can be set in
-
-      .. code-block:: yaml
-
-        radial_basis_type: "physical"
-
-:param basis_scale: Scaling parameter of the radial basis functions, representing the
-    characteristic width (in Å) of the basis functions.
-:param trainable_basis: If :py:obj:`True`, the radial basis functions will be
-    accompanied by the trainable multi-layer perceptron (MLP). If :py:obj:`False`, the
-    radial basis functions will be fixed.
-:param normalize: Whether to use normalizations such as LayerNorm in the model.
-:param contract_center_species: If ``True``, the Alchemcial Compression will be applied
-    on center species as well. If ``False``, the Alchemical Compression will be applied
-    only on the neighbor species.
-
-
-bpnn
-^^^^
-:param hidden_sizes: number of neurons in each hidden layer
-:param output_size: number of neurons in the output layer
-
-training
-########
-The hyperparameters for training are
-
-:param batch_size: batch size
-:param num_epochs: number of training epochs
-:param learning_rate: learning rate
-:param log_interval: number of epochs that elapse between reporting new training results
-:param checkpoint_interval: Interval to save a checkpoint to disk.
-:param per_atom_targets: Specifies whether the model should be trained on a per-atom
-    loss. In that case, the logger will also output per-atom metrics for that target. In
-    any case, the final summary will be per-structure.
-
-References
-----------
-.. footbibliography::
-
-
diff --git a/docs/static/refs.bib b/docs/static/refs.bib
index c1c9a2c03..4150a75e0 100644
--- a/docs/static/refs.bib
+++ b/docs/static/refs.bib
@@ -42,29 +42,6 @@ @article{willatt_feature_2018
   langid = {english}
 }
 
-@article{mazitov_surface_2024,
-	author={Mazitov, Arslan and Springer, Maximilian A. and Lopanitsyna, Nataliya and Fraux, Guillaume and De, Sandip and Ceriotti, Michele},
-	title={Surface segregation in high-entropy alloys from alchemical machine learning},
-	journal={Journal of Physics: Materials},
-	url={http://iopscience.iop.org/article/10.1088/2515-7639/ad2983},
-	year={2024},
-}
-
-@article{lopanitsyna_modeling_2023,
-  title = {Modeling high-entropy transition metal alloys with alchemical compression},
-  author = {Lopanitsyna, Nataliya and Fraux, Guillaume and Springer, Maximilian A. and De, Sandip and Ceriotti, Michele},
-  journal = {Phys. Rev. Mater.},
-  volume = {7},
-  issue = {4},
-  pages = {045802},
-  numpages = {15},
-  year = {2023},
-  month = {Apr},
-  publisher = {American Physical Society},
-  doi = {10.1103/PhysRevMaterials.7.045802},
-  url = {https://link.aps.org/doi/10.1103/PhysRevMaterials.7.045802}
-}
-
 @article{bigi_smooth_2022,
     author = {Bigi, Filippo and Huguenin-Dumittan, Kevin K. and Ceriotti, Michele and Manolopoulos, David E.},
     title = "{A smooth basis for atomistic machine learning}",
diff --git a/pyproject.toml b/pyproject.toml
index 14a986768..acaeb0947 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -61,9 +61,6 @@ build-backend = "setuptools.build_meta"
 soap-bpnn = [
     "rascaline-torch @ git+https://github.com/luthaf/rascaline@5326b6e#subdirectory=python/rascaline-torch",
 ]
-alchemical-model = [
-  "torch_alchemical @ git+https://github.com/abmazitov/torch_alchemical.git@51ff519",
-]
 pet = [
   "pet @ git+https://github.com/lab-cosmo/pet@5d40710",
 ]
diff --git a/src/metatrain/experimental/alchemical_model/__init__.py b/src/metatrain/experimental/alchemical_model/__init__.py
deleted file mode 100644
index 53ea53c47..000000000
--- a/src/metatrain/experimental/alchemical_model/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from .model import AlchemicalModel
-from .trainer import Trainer
-
-__model__ = AlchemicalModel
-__trainer__ = Trainer
-
-__authors__ = [
-    ("Arslan Mazitov <arslan.mazitov@epfl.ch>", "@abmazitov"),
-]
-
-__maintainers__ = [
-    ("Arslan Mazitov <arslan.mazitov@epfl.ch>", "@abmazitov"),
-]
diff --git a/src/metatrain/experimental/alchemical_model/default-hypers.yaml b/src/metatrain/experimental/alchemical_model/default-hypers.yaml
deleted file mode 100644
index 7bceeac34..000000000
--- a/src/metatrain/experimental/alchemical_model/default-hypers.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-architecture:
-  name: experimental.alchemical_model
-
-  model:
-    soap:
-      num_pseudo_species: 4
-      cutoff: 5.0
-      basis_cutoff_power_spectrum: 400
-      radial_basis_type: "physical"
-      basis_scale: 3.0
-      trainable_basis: true
-      normalize: true
-      contract_center_species: true
-    bpnn:
-      hidden_sizes: [32, 32]
-      output_size: 1
-    zbl: false
-
-  training:
-    batch_size: 8
-    num_epochs: 100
-    learning_rate: 0.001
-    early_stopping_patience: 200
-    scheduler_patience: 100
-    scheduler_factor: 0.8
-    log_interval: 5
-    checkpoint_interval: 25
-    per_structure_targets: []
-    log_mae: False
-    loss:
-      type: mse
-      weights: {}
-      reduction: sum
diff --git a/src/metatrain/experimental/alchemical_model/model.py b/src/metatrain/experimental/alchemical_model/model.py
deleted file mode 100644
index 746259f1d..000000000
--- a/src/metatrain/experimental/alchemical_model/model.py
+++ /dev/null
@@ -1,226 +0,0 @@
-from pathlib import Path
-from typing import Dict, List, Optional, Union
-
-import metatensor.torch
-import torch
-from metatensor.torch import Labels, TensorBlock, TensorMap
-from metatensor.torch.atomistic import (
-    MetatensorAtomisticModel,
-    ModelCapabilities,
-    ModelMetadata,
-    ModelOutput,
-    NeighborListOptions,
-    System,
-)
-from torch_alchemical.models import AlchemicalModel as AlchemicalModelUpstream
-
-from ...utils.additive import ZBL
-from ...utils.data.dataset import DatasetInfo
-from ...utils.dtype import dtype_to_str
-from .utils import systems_to_torch_alchemical_batch
-
-
-class AlchemicalModel(torch.nn.Module):
-
-    __supported_devices__ = ["cuda", "cpu"]
-    __supported_dtypes__ = [torch.float64, torch.float32]
-
-    def __init__(self, model_hypers: Dict, dataset_info: DatasetInfo) -> None:
-        super().__init__()
-        self.hypers = model_hypers
-        self.dataset_info = dataset_info
-        self.atomic_types = dataset_info.atomic_types
-
-        if len(dataset_info.targets) != 1:
-            raise ValueError("The Alchemical Model only supports a single target")
-
-        target_name = next(iter(dataset_info.targets.keys()))
-        target = dataset_info.targets[target_name]
-        if not (
-            target.is_scalar
-            and target.quantity == "energy"
-            and len(target.layout.block(0).properties) == 1
-        ):
-            raise ValueError(
-                "The Alchemical Model only supports total-energy-like outputs, "
-                f"but a {target.quantity} was provided"
-            )
-        if target.per_atom:
-            raise ValueError(
-                "Alchemical Model only supports per-structure outputs, "
-                "but a per-atom output was provided"
-            )
-
-        self.outputs = {
-            key: ModelOutput(
-                quantity=value.quantity,
-                unit=value.unit,
-                per_atom=False,
-            )
-            for key, value in dataset_info.targets.items()
-        }
-
-        self.alchemical_model = AlchemicalModelUpstream(
-            unique_numbers=self.atomic_types,
-            **self.hypers["soap"],
-            **self.hypers["bpnn"],
-        )
-
-        additive_models = []
-        if self.hypers["zbl"]:
-            additive_models.append(ZBL(model_hypers, dataset_info))
-        self.additive_models = torch.nn.ModuleList(additive_models)
-
-        self.cutoff = self.hypers["soap"]["cutoff"]
-        self.is_restarted = False
-
-    def restart(self, dataset_info: DatasetInfo) -> "AlchemicalModel":
-        if dataset_info != self.dataset_info:
-            raise ValueError(
-                "Alchemical model cannot be restarted with different "
-                "dataset information"
-            )
-        self.is_restarted = True
-        return self
-
-    def requested_neighbor_lists(self) -> List[NeighborListOptions]:
-        return [
-            NeighborListOptions(
-                cutoff=self.cutoff,
-                full_list=True,
-                strict=True,
-            )
-        ]
-
-    def forward(
-        self,
-        systems: List[System],
-        outputs: Dict[str, ModelOutput],
-        selected_atoms: Optional[Labels] = None,
-    ) -> Dict[str, TensorMap]:
-        assert len(outputs.keys()) == 1
-        output_name = list(outputs.keys())[0]
-
-        if selected_atoms is not None:
-            raise NotImplementedError(
-                "Alchemical Model does not support selected atoms."
-            )
-        options = self.requested_neighbor_lists()[0]
-        batch = systems_to_torch_alchemical_batch(systems, options)
-        predictions = self.alchemical_model(
-            positions=batch["positions"],
-            cells=batch["cells"],
-            numbers=batch["numbers"],
-            edge_indices=batch["edge_indices"],
-            edge_offsets=batch["edge_offsets"],
-            batch=batch["batch"],
-        )
-
-        total_energies: Dict[str, TensorMap] = {}
-        keys = Labels(
-            "_", torch.zeros((1, 1), dtype=torch.int32, device=predictions.device)
-        )
-        properties = Labels(
-            "energy",
-            torch.zeros((1, 1), dtype=torch.int32, device=predictions.device),
-        )
-        samples = Labels(
-            names=["system"],
-            values=torch.arange(
-                len(predictions),
-                device=predictions.device,
-            ).view(-1, 1),
-        )
-        block = TensorBlock(
-            samples=samples,
-            components=[],
-            properties=properties,
-            values=predictions,
-        )
-        total_energies[output_name] = TensorMap(
-            keys=keys,
-            blocks=[block],
-        )
-
-        if not self.training:
-            # at evaluation, we also add the additive contributions
-            for additive_model in self.additive_models:
-                additive_contributions = additive_model(
-                    systems, outputs, selected_atoms
-                )
-                total_energies[output_name] = metatensor.torch.add(
-                    total_energies[output_name],
-                    additive_contributions[output_name],
-                )
-
-        return total_energies
-
-    @classmethod
-    def load_checkpoint(cls, path: Union[str, Path]) -> "AlchemicalModel":
-
-        # Load the checkpoint
-        checkpoint = torch.load(path, weights_only=False, map_location="cpu")
-        model_hypers = checkpoint["model_hypers"]
-        model_state_dict = checkpoint["model_state_dict"]
-
-        # Create the model
-        model = cls(**model_hypers)
-        dtype = next(iter(model_state_dict.values())).dtype
-        model.to(dtype).load_state_dict(model_state_dict)
-
-        return model
-
-    def export(self) -> MetatensorAtomisticModel:
-        dtype = next(self.parameters()).dtype
-        if dtype not in self.__supported_dtypes__:
-            raise ValueError(f"unsupported dtype {dtype} for AlchemicalModel")
-
-        # Make sure the model is all in the same dtype
-        # For example, after training, the additive models could still be in
-        # float64
-        self.to(dtype)
-
-        interaction_ranges = [self.hypers["soap"]["cutoff"]]
-        for additive_model in self.additive_models:
-            if hasattr(additive_model, "cutoff_radius"):
-                interaction_ranges.append(additive_model.cutoff_radius)
-        interaction_range = max(interaction_ranges)
-
-        capabilities = ModelCapabilities(
-            outputs=self.outputs,
-            atomic_types=self.atomic_types,
-            interaction_range=interaction_range,
-            length_unit=self.dataset_info.length_unit,
-            supported_devices=self.__supported_devices__,
-            dtype=dtype_to_str(dtype),
-        )
-
-        return MetatensorAtomisticModel(self.eval(), ModelMetadata(), capabilities)
-
-    def set_composition_weights(
-        self,
-        input_composition_weights: torch.Tensor,
-        atomic_types: List[int],
-    ) -> None:
-        """Set the composition weights for a given output."""
-        input_composition_weights = input_composition_weights.to(
-            dtype=self.alchemical_model.composition_weights.dtype,
-            device=self.alchemical_model.composition_weights.device,
-        )
-        index = [self.atomic_types.index(s) for s in atomic_types]
-        composition_weights = input_composition_weights[:, index]
-        self.alchemical_model.set_composition_weights(composition_weights)
-
-    def set_normalization_factor(self, normalization_factor: torch.Tensor) -> None:
-        """Set the normalization factor for output of the model."""
-        self.alchemical_model.set_normalization_factor(normalization_factor)
-
-    def set_basis_normalization_factor(
-        self, basis_normalization_factor: torch.Tensor
-    ) -> None:
-        """Set the normalization factor for the basis functions of the model."""
-        self.alchemical_model.set_basis_normalization_factor(basis_normalization_factor)
-
-    def set_energies_scale_factor(self, energies_scale_factor: torch.Tensor) -> None:
-        """Set the energies scale factor for the model."""
-        self.alchemical_model.set_energies_scale_factor(energies_scale_factor)
diff --git a/src/metatrain/experimental/alchemical_model/schema-hypers.json b/src/metatrain/experimental/alchemical_model/schema-hypers.json
deleted file mode 100644
index 957044786..000000000
--- a/src/metatrain/experimental/alchemical_model/schema-hypers.json
+++ /dev/null
@@ -1,153 +0,0 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "properties": {
-    "name": {
-      "type": "string",
-      "enum": ["experimental.alchemical_model"]
-    },
-    "model": {
-      "type": "object",
-      "properties": {
-        "soap": {
-          "type": "object",
-          "properties": {
-            "num_pseudo_species": {
-              "type": "integer"
-            },
-            "cutoff": {
-              "type": "number"
-            },
-            "basis_cutoff_power_spectrum": {
-              "type": "integer"
-            },
-            "radial_basis_type": {
-              "type": "string"
-            },
-            "basis_scale": {
-              "type": "number"
-            },
-            "trainable_basis": {
-              "type": "boolean"
-            },
-            "normalize": {
-              "type": "boolean"
-            },
-            "contract_center_species": {
-              "type": "boolean"
-            }
-          },
-          "additionalProperties": false
-        },
-        "bpnn": {
-          "type": "object",
-          "properties": {
-            "hidden_sizes": {
-              "type": "array",
-              "items": {
-                "type": "integer"
-              }
-            },
-            "output_size": {
-              "type": "integer"
-            }
-          },
-          "additionalProperties": false
-        },
-        "zbl": {
-          "type": "boolean"
-        }
-      },
-      "additionalProperties": false
-    },
-    "training": {
-      "type": "object",
-      "properties": {
-        "batch_size": {
-          "type": "integer"
-        },
-        "num_epochs": {
-          "type": "integer"
-        },
-        "learning_rate": {
-          "type": "number"
-        },
-        "early_stopping_patience": {
-          "type": "integer"
-        },
-        "scheduler_patience": {
-          "type": "integer"
-        },
-        "scheduler_factor": {
-          "type": "number"
-        },
-        "log_interval": {
-          "type": "integer"
-        },
-        "checkpoint_interval": {
-          "type": "integer"
-        },
-        "per_structure_targets": {
-          "type": "array",
-          "items": {
-            "type": "string"
-          }
-        },
-        "log_mae": {
-          "type": "boolean"
-        },
-        "loss": {
-          "type": "object",
-          "properties": {
-            "weights": {
-              "type": "object",
-              "patternProperties": {
-                ".*": {
-                  "type": "number"
-                }
-              },
-              "additionalProperties": false
-            },
-            "reduction": {
-              "type": "string",
-              "enum": ["sum", "mean", "none"]
-            },
-            "type": {
-              "oneOf": [
-                {
-                  "type": "string",
-                  "enum": ["mse", "mae"]
-                },
-                {
-                  "type": "object",
-                  "properties": {
-                    "huber": {
-                      "type": "object",
-                      "properties": {
-                        "deltas": {
-                          "type": "object",
-                          "patternProperties": {
-                            ".*": {
-                              "type": "number"
-                            }
-                          },
-                          "additionalProperties": false
-                        }
-                      },
-                      "required": ["deltas"],
-                      "additionalProperties": false
-                    }
-                  },
-                  "additionalProperties": false
-                }
-              ]
-            }
-          },
-          "additionalProperties": false
-        }
-      },
-      "additionalProperties": false
-    }
-  },
-  "additionalProperties": false
-}
diff --git a/src/metatrain/experimental/alchemical_model/tests/__init__.py b/src/metatrain/experimental/alchemical_model/tests/__init__.py
deleted file mode 100644
index 4fcc1d3f5..000000000
--- a/src/metatrain/experimental/alchemical_model/tests/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from pathlib import Path
-from metatrain.utils.architectures import get_default_hypers
-
-
-DATASET_PATH = str(Path(__file__).parents[5] / "tests/resources/qm9_reduced_100.xyz")
-
-QM9_DATASET_PATH = str(
-    Path(__file__).parents[5] / "tests/resources/qm9_reduced_100.xyz"
-)
-
-DEFAULT_HYPERS = get_default_hypers("experimental.alchemical_model")
-MODEL_HYPERS = DEFAULT_HYPERS["model"]
diff --git a/src/metatrain/experimental/alchemical_model/tests/test_exported.py b/src/metatrain/experimental/alchemical_model/tests/test_exported.py
deleted file mode 100644
index 3a900b57b..000000000
--- a/src/metatrain/experimental/alchemical_model/tests/test_exported.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import pytest
-import torch
-from metatensor.torch.atomistic import ModelEvaluationOptions, System
-
-from metatrain.experimental.alchemical_model import AlchemicalModel
-from metatrain.utils.data import DatasetInfo
-from metatrain.utils.data.target_info import get_energy_target_info
-from metatrain.utils.neighbor_lists import (
-    get_requested_neighbor_lists,
-    get_system_with_neighbor_lists,
-)
-
-from . import MODEL_HYPERS
-
-
-@pytest.mark.parametrize("device", ["cpu", "cuda"])
-@pytest.mark.parametrize("dtype", [torch.float32, torch.float64])
-def test_to(device, dtype):
-    """Tests that the `.to()` method of the exported model works."""
-    if device == "cuda" and not torch.cuda.is_available():
-        pytest.skip("CUDA is not available")
-
-    dataset_info = DatasetInfo(
-        length_unit="Angstrom",
-        atomic_types=[1, 6, 7, 8],
-        targets={"energy": get_energy_target_info({"unit": "eV"})},
-    )
-    model = AlchemicalModel(MODEL_HYPERS, dataset_info).to(dtype=dtype)
-    exported = model.export()
-
-    exported.to(device=device)
-
-    system = System(
-        types=torch.tensor([6, 6]),
-        positions=torch.tensor([[0.0, 0.0, 0.0], [0.0, 0.0, 1.0]]),
-        cell=torch.zeros(3, 3),
-        pbc=torch.tensor([False, False, False]),
-    )
-    requested_neighbor_lists = get_requested_neighbor_lists(exported)
-    system = get_system_with_neighbor_lists(system, requested_neighbor_lists)
-    system = system.to(device=device, dtype=dtype)
-
-    evaluation_options = ModelEvaluationOptions(
-        length_unit=dataset_info.length_unit,
-        outputs=model.outputs,
-    )
-
-    exported([system], evaluation_options, check_consistency=True)
diff --git a/src/metatrain/experimental/alchemical_model/tests/test_functionality.py b/src/metatrain/experimental/alchemical_model/tests/test_functionality.py
deleted file mode 100644
index 5bb185629..000000000
--- a/src/metatrain/experimental/alchemical_model/tests/test_functionality.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import pytest
-import torch
-from metatensor.torch.atomistic import ModelEvaluationOptions, System
-
-from metatrain.experimental.alchemical_model import AlchemicalModel
-from metatrain.utils.data import DatasetInfo
-from metatrain.utils.data.target_info import (
-    get_energy_target_info,
-    get_generic_target_info,
-)
-from metatrain.utils.neighbor_lists import (
-    get_requested_neighbor_lists,
-    get_system_with_neighbor_lists,
-)
-
-from . import MODEL_HYPERS
-
-
-def test_prediction_subset_elements():
-    """Tests that the model can predict on a subset of the elements it was trained
-    on."""
-
-    dataset_info = DatasetInfo(
-        length_unit="Angstrom",
-        atomic_types=[1, 6, 7, 8],
-        targets={"energy": get_energy_target_info({"unit": "eV"})},
-    )
-
-    model = AlchemicalModel(MODEL_HYPERS, dataset_info)
-
-    system = System(
-        types=torch.tensor([6, 6]),
-        positions=torch.tensor([[0.0, 0.0, 0.0], [0.0, 0.0, 1.0]]),
-        cell=torch.zeros(3, 3),
-        pbc=torch.tensor([False, False, False]),
-    )
-    requested_neighbor_lists = get_requested_neighbor_lists(model)
-    system = get_system_with_neighbor_lists(system, requested_neighbor_lists)
-
-    evaluation_options = ModelEvaluationOptions(
-        length_unit=dataset_info.length_unit,
-        outputs=model.outputs,
-    )
-
-    exported = model.export()
-    exported([system], evaluation_options, check_consistency=True)
-
-
-@pytest.mark.parametrize("per_atom", [True, False])
-def test_vector_output(per_atom):
-    """Tests that the model can predict a (spherical) vector output."""
-
-    dataset_info = DatasetInfo(
-        length_unit="Angstrom",
-        atomic_types=[1, 6, 7, 8],
-        targets={
-            "forces": get_generic_target_info(
-                {
-                    "quantity": "forces",
-                    "unit": "",
-                    "type": {
-                        "spherical": {"irreps": [{"o3_lambda": 1, "o3_sigma": 1}]}
-                    },
-                    "num_subtargets": 100,
-                    "per_atom": per_atom,
-                }
-            )
-        },
-    )
-
-    with pytest.raises(
-        ValueError, match="The Alchemical Model only supports total-energy-like outputs"
-    ):
-        AlchemicalModel(MODEL_HYPERS, dataset_info)
diff --git a/src/metatrain/experimental/alchemical_model/tests/test_invariance.py b/src/metatrain/experimental/alchemical_model/tests/test_invariance.py
deleted file mode 100644
index 601604f0a..000000000
--- a/src/metatrain/experimental/alchemical_model/tests/test_invariance.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import copy
-
-import ase.io
-import torch
-from metatensor.torch.atomistic import ModelEvaluationOptions, systems_to_torch
-
-from metatrain.experimental.alchemical_model import AlchemicalModel
-from metatrain.utils.data import DatasetInfo
-from metatrain.utils.data.target_info import get_energy_target_info
-from metatrain.utils.neighbor_lists import (
-    get_requested_neighbor_lists,
-    get_system_with_neighbor_lists,
-)
-
-from . import DATASET_PATH, MODEL_HYPERS
-
-
-def test_rotational_invariance():
-    """Tests that the model is rotationally invariant."""
-
-    dataset_info = DatasetInfo(
-        length_unit="Angstrom",
-        atomic_types=[1, 6, 7, 8],
-        targets={"energy": get_energy_target_info({"unit": "eV"})},
-    )
-    model = AlchemicalModel(MODEL_HYPERS, dataset_info)
-
-    system = ase.io.read(DATASET_PATH)
-    original_system = copy.deepcopy(system)
-    original_system = systems_to_torch(original_system)
-    requested_neighbor_lists = get_requested_neighbor_lists(model)
-    original_system = get_system_with_neighbor_lists(
-        original_system, requested_neighbor_lists
-    )
-
-    system.rotate(48, "y")
-    system = systems_to_torch(system)
-    requested_neighbor_lists = get_requested_neighbor_lists(model)
-    system = get_system_with_neighbor_lists(system, requested_neighbor_lists)
-
-    evaluation_options = ModelEvaluationOptions(
-        length_unit=dataset_info.length_unit,
-        outputs=model.outputs,
-    )
-
-    exported = model.export()
-
-    original_output = exported(
-        [original_system],
-        evaluation_options,
-        check_consistency=True,
-    )
-    rotated_output = exported([system], evaluation_options, check_consistency=True)
-
-    torch.testing.assert_close(
-        original_output["energy"].block().values,
-        rotated_output["energy"].block().values,
-    )
diff --git a/src/metatrain/experimental/alchemical_model/tests/test_regression.py b/src/metatrain/experimental/alchemical_model/tests/test_regression.py
deleted file mode 100644
index ab94cf2c6..000000000
--- a/src/metatrain/experimental/alchemical_model/tests/test_regression.py
+++ /dev/null
@@ -1,150 +0,0 @@
-import random
-
-import numpy as np
-import torch
-from metatensor.torch.atomistic import ModelEvaluationOptions
-from omegaconf import OmegaConf
-
-from metatrain.experimental.alchemical_model import AlchemicalModel, Trainer
-from metatrain.utils.data import Dataset, DatasetInfo, read_systems, read_targets
-from metatrain.utils.data.target_info import get_energy_target_info
-from metatrain.utils.neighbor_lists import (
-    get_requested_neighbor_lists,
-    get_system_with_neighbor_lists,
-)
-
-from . import DATASET_PATH, DEFAULT_HYPERS, MODEL_HYPERS
-
-
-# reproducibility
-random.seed(0)
-np.random.seed(0)
-torch.manual_seed(0)
-
-
-def test_regression_init():
-    """Perform a regression test on the model at initialization"""
-
-    targets = {}
-    targets["mtt::U0"] = get_energy_target_info({"unit": "eV"})
-
-    dataset_info = DatasetInfo(
-        length_unit="Angstrom", atomic_types=[1, 6, 7, 8], targets=targets
-    )
-    model = AlchemicalModel(MODEL_HYPERS, dataset_info)
-
-    # Predict on the first five systems
-    systems = read_systems(DATASET_PATH)[:5]
-    requested_neighbor_lists = get_requested_neighbor_lists(model)
-    systems = [
-        get_system_with_neighbor_lists(system, requested_neighbor_lists)
-        for system in systems
-    ]
-
-    evaluation_options = ModelEvaluationOptions(
-        length_unit=model.dataset_info.length_unit,
-        outputs=model.outputs,
-    )
-
-    exported = model.export()
-
-    systems = [system.to(dtype=torch.float32) for system in systems]
-    output = exported(systems, evaluation_options, check_consistency=True)
-
-    expected_output = torch.tensor(
-        [
-            [-11.203639984131],
-            [4.095238208771],
-            [-4.632149219513],
-            [-13.758152008057],
-            [-2.430717945099],
-        ],
-    )
-
-    # if you need to change the hardcoded values:
-    # torch.set_printoptions(precision=12)
-    # print(output["mtt::U0"].block().values)
-
-    torch.testing.assert_close(
-        output["mtt::U0"].block().values,
-        expected_output,
-    )
-
-
-def test_regression_train():
-    """Perform a regression test on the model when
-    trained for 2 epoch on a small dataset"""
-
-    systems = read_systems(DATASET_PATH)
-
-    conf = {
-        "mtt::U0": {
-            "quantity": "energy",
-            "read_from": DATASET_PATH,
-            "reader": "ase",
-            "key": "U0",
-            "unit": "eV",
-            "type": "scalar",
-            "per_atom": False,
-            "num_subtargets": 1,
-            "forces": False,
-            "stress": False,
-            "virial": False,
-        }
-    }
-    targets, target_info_dict = read_targets(OmegaConf.create(conf))
-    dataset = Dataset.from_dict({"system": systems, "mtt::U0": targets["mtt::U0"]})
-
-    hypers = DEFAULT_HYPERS.copy()
-
-    dataset_info = DatasetInfo(
-        length_unit="Angstrom", atomic_types=[1, 6, 7, 8], targets=target_info_dict
-    )
-    model = AlchemicalModel(MODEL_HYPERS, dataset_info)
-
-    requested_neighbor_lists = get_requested_neighbor_lists(model)
-    systems = [
-        get_system_with_neighbor_lists(system, requested_neighbor_lists)
-        for system in systems
-    ]
-
-    hypers["training"]["num_epochs"] = 1
-    trainer = Trainer(hypers["training"])
-    trainer.train(
-        model=model,
-        dtype=torch.float32,
-        devices=[torch.device("cpu")],
-        train_datasets=[dataset],
-        val_datasets=[dataset],
-        checkpoint_dir=".",
-    )
-
-    # Predict on the first five systems
-    evaluation_options = ModelEvaluationOptions(
-        length_unit=dataset_info.length_unit,
-        outputs=model.outputs,
-    )
-
-    exported = model.export()
-
-    systems = [system.to(dtype=torch.float32) for system in systems]
-    output = exported(systems[:5], evaluation_options, check_consistency=True)
-
-    expected_output = torch.tensor(
-        [
-            [-40.115474700928],
-            [-56.302265167236],
-            [-76.722442626953],
-            [-77.022941589355],
-            [-92.791801452637],
-        ],
-    )
-
-    # if you need to change the hardcoded values:
-    # torch.set_printoptions(precision=12)
-    # print(output["mtt::U0"].block().values)
-
-    torch.testing.assert_close(
-        output["mtt::U0"].block().values,
-        expected_output,
-    )
diff --git a/src/metatrain/experimental/alchemical_model/tests/test_torch_alchemical_compatibility.py b/src/metatrain/experimental/alchemical_model/tests/test_torch_alchemical_compatibility.py
deleted file mode 100644
index f9ceee10e..000000000
--- a/src/metatrain/experimental/alchemical_model/tests/test_torch_alchemical_compatibility.py
+++ /dev/null
@@ -1,106 +0,0 @@
-import random
-
-import numpy as np
-import torch
-from ase.io import read
-from metatensor.torch.atomistic import ModelEvaluationOptions, NeighborListOptions
-from torch_alchemical.data import AtomisticDataset
-from torch_alchemical.models import AlchemicalModel as AlchemicalModelUpstream
-from torch_alchemical.transforms import NeighborList
-from torch_alchemical.utils import get_list_of_unique_atomic_numbers
-from torch_geometric.loader import DataLoader
-
-from metatrain.experimental.alchemical_model import AlchemicalModel
-from metatrain.experimental.alchemical_model.utils import (
-    systems_to_torch_alchemical_batch,
-)
-from metatrain.utils.data import DatasetInfo, read_systems
-from metatrain.utils.data.target_info import get_energy_target_info
-from metatrain.utils.neighbor_lists import get_system_with_neighbor_lists
-
-from . import MODEL_HYPERS, QM9_DATASET_PATH
-
-
-random.seed(0)
-np.random.seed(0)
-torch.manual_seed(0)
-
-systems = read_systems(QM9_DATASET_PATH)
-systems = [system.to(torch.float32) for system in systems]
-nl_options = NeighborListOptions(
-    cutoff=5.0,
-    full_list=True,
-    strict=True,
-)
-systems = [get_system_with_neighbor_lists(system, [nl_options]) for system in systems]
-
-frames = read(QM9_DATASET_PATH, ":")
-dataset = AtomisticDataset(
-    frames,
-    target_properties=["energies", "forces"],
-    transforms=[NeighborList(cutoff_radius=5.0)],
-)
-dataloader = DataLoader(dataset, batch_size=len(dataset), shuffle=False)
-batch = next(iter(dataloader))
-
-
-def test_systems_to_torch_alchemical_batch():
-    batch_dict = systems_to_torch_alchemical_batch(systems, nl_options)
-    print(batch_dict["positions"].dtype, batch.pos.dtype)
-    torch.testing.assert_close(batch_dict["positions"], batch.pos)
-    torch.testing.assert_close(batch_dict["cells"], batch.cell)
-    torch.testing.assert_close(batch_dict["numbers"], batch.numbers)
-
-    index_1, counts_1 = torch.unique(batch_dict["batch"], return_counts=True)
-    index_2, counts_2 = torch.unique(batch.batch, return_counts=True)
-    torch.testing.assert_close(index_1, index_2)
-    torch.testing.assert_close(counts_1, counts_2)
-
-    offset_1, counts_1 = torch.unique(batch_dict["edge_offsets"], return_counts=True)
-    offset_2, counts_2 = torch.unique(batch.edge_offsets, return_counts=True)
-    torch.testing.assert_close(offset_1, offset_2)
-    torch.testing.assert_close(counts_1, counts_2)
-    torch.testing.assert_close(batch_dict["batch"], batch.batch)
-
-
-def test_alchemical_model_inference():
-    random.seed(0)
-    np.random.seed(0)
-    torch.manual_seed(0)
-    unique_numbers = get_list_of_unique_atomic_numbers(frames)
-
-    dataset_info = DatasetInfo(
-        length_unit="Angstrom",
-        atomic_types=unique_numbers,
-        targets={"energy": get_energy_target_info({"unit": "eV"})},
-    )
-
-    alchemical_model = AlchemicalModel(MODEL_HYPERS, dataset_info)
-
-    evaluation_options = ModelEvaluationOptions(
-        length_unit=dataset_info.length_unit,
-        outputs=alchemical_model.outputs,
-    )
-
-    exported = alchemical_model.export()
-
-    output = exported(systems, evaluation_options, check_consistency=True)
-
-    random.seed(0)
-    np.random.seed(0)
-    torch.manual_seed(0)
-
-    original_model = AlchemicalModelUpstream(
-        unique_numbers=unique_numbers,
-        **MODEL_HYPERS["soap"],
-        **MODEL_HYPERS["bpnn"],
-    ).eval()
-    original_output = original_model(
-        positions=batch.pos,
-        cells=batch.cell,
-        numbers=batch.numbers,
-        edge_indices=batch.edge_index,
-        edge_offsets=batch.edge_offsets,
-        batch=batch.batch,
-    )
-    torch.testing.assert_close(output["energy"].block().values, original_output)
diff --git a/src/metatrain/experimental/alchemical_model/tests/test_torchscript.py b/src/metatrain/experimental/alchemical_model/tests/test_torchscript.py
deleted file mode 100644
index fbac2fe6b..000000000
--- a/src/metatrain/experimental/alchemical_model/tests/test_torchscript.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import torch
-
-from metatrain.experimental.alchemical_model import AlchemicalModel
-from metatrain.utils.data import DatasetInfo
-from metatrain.utils.data.target_info import get_energy_target_info
-
-from . import MODEL_HYPERS
-
-
-def test_torchscript():
-    """Tests that the model can be jitted."""
-
-    dataset_info = DatasetInfo(
-        length_unit="Angstrom",
-        atomic_types=[1, 6, 7, 8],
-        targets={"energy": get_energy_target_info({"unit": "eV"})},
-    )
-
-    model = AlchemicalModel(MODEL_HYPERS, dataset_info)
-    torch.jit.script(model, {"energy": model.outputs["energy"]})
-
-
-def test_torchscript_save_load():
-    """Tests that the model can be jitted and saved."""
-
-    dataset_info = DatasetInfo(
-        length_unit="Angstrom",
-        atomic_types=[1, 6, 7, 8],
-        targets={"energy": get_energy_target_info({"unit": "eV"})},
-    )
-    model = AlchemicalModel(MODEL_HYPERS, dataset_info)
-    torch.jit.save(
-        torch.jit.script(
-            model,
-            {"energy": model.outputs["energy"]},
-        ),
-        "alchemical_model.pt",
-    )
-
-    torch.jit.load("alchemical_model.pt")
diff --git a/src/metatrain/experimental/alchemical_model/trainer.py b/src/metatrain/experimental/alchemical_model/trainer.py
deleted file mode 100644
index db1caa825..000000000
--- a/src/metatrain/experimental/alchemical_model/trainer.py
+++ /dev/null
@@ -1,411 +0,0 @@
-import copy
-import logging
-from pathlib import Path
-from typing import List, Union
-
-import torch
-from metatensor.learn.data import DataLoader
-
-from ...utils.additive import remove_additive
-from ...utils.data import (
-    CombinedDataLoader,
-    Dataset,
-    check_datasets,
-    collate_fn,
-    get_all_targets,
-)
-from ...utils.evaluate_model import evaluate_model
-from ...utils.external_naming import to_external_name
-from ...utils.io import check_file_extension
-from ...utils.logging import MetricLogger
-from ...utils.loss import TensorMapDictLoss
-from ...utils.metrics import MAEAccumulator, RMSEAccumulator
-from ...utils.neighbor_lists import (
-    get_requested_neighbor_lists,
-    get_system_with_neighbor_lists,
-)
-from ...utils.per_atom import average_by_num_atoms
-from ...utils.transfer import (
-    systems_and_targets_to_device,
-    systems_and_targets_to_dtype,
-)
-from . import AlchemicalModel
-from .utils.composition import calculate_composition_weights
-from .utils.normalize import (
-    get_average_number_of_atoms,
-    get_average_number_of_neighbors,
-    remove_composition_from_dataset,
-)
-
-
-logger = logging.getLogger(__name__)
-
-
-class Trainer:
-    def __init__(self, train_hypers):
-        self.hypers = train_hypers
-        self.optimizer_state_dict = None
-        self.scheduler_state_dict = None
-        self.epoch = None
-
-    def train(
-        self,
-        model: AlchemicalModel,
-        dtype: torch.dtype,
-        devices: List[torch.device],
-        train_datasets: List[Union[Dataset, torch.utils.data.Subset]],
-        val_datasets: List[Union[Dataset, torch.utils.data.Subset]],
-        checkpoint_dir: str,
-    ):
-        assert dtype in AlchemicalModel.__supported_dtypes__
-
-        device = devices[0]  # only one device, as we don't support multi-gpu for now
-
-        if len(model.dataset_info.targets) != 1:
-            raise ValueError("The Alchemical Model only supports a single target")
-        target_name = next(iter(model.dataset_info.targets.keys()))
-        if model.dataset_info.targets[target_name].quantity != "energy":
-            raise ValueError("The Alchemical Model only supports energies as target")
-        if model.dataset_info.targets[target_name].per_atom:
-            raise ValueError("The Alchemical Model does not support per-atom training")
-
-        # Perform canonical checks on the datasets:
-        logger.info("Checking datasets for consistency")
-        check_datasets(train_datasets, val_datasets)
-
-        # Calculating the neighbor lists for the training and validation datasets:
-        logger.info("Calculating neighbor lists for the datasets")
-        requested_neighbor_lists = get_requested_neighbor_lists(model)
-        for dataset in train_datasets + val_datasets:
-            for i in range(len(dataset)):
-                system = dataset[i]["system"]
-                # The following line attaches the neighbors lists to the system,
-                # and doesn't require to reassign the system to the dataset:
-                _ = get_system_with_neighbor_lists(system, requested_neighbor_lists)
-
-        # Calculate the average number of atoms and neighbor in the training datasets:
-        average_number_of_atoms = get_average_number_of_atoms(train_datasets)
-        average_number_of_neighbors = get_average_number_of_neighbors(train_datasets)
-
-        # Given that currently multiple datasets are not supported, we can assume that:
-        average_number_of_atoms = average_number_of_atoms[0]
-        average_number_of_neighbors = average_number_of_neighbors[0]
-
-        # Set the normalization factors for the model:
-        model.set_normalization_factor(average_number_of_atoms)
-        model.set_basis_normalization_factor(average_number_of_neighbors)
-
-        logger.info(f"Training on device {device} with dtype {dtype}")
-        model.to(device=device, dtype=dtype)
-        # The additive models of the Alchemical Model are always in float64 (to avoid
-        # numerical errors in the composition weights, which can be very large).
-        for additive_model in model.additive_models:
-            additive_model.to(dtype=torch.float64)
-
-        # Calculate and set the composition weights, but only if
-        # this is the first training run:
-        if not model.is_restarted:
-            for target_name in model.outputs.keys():
-                train_datasets_with_target = []
-                for dataset in train_datasets:
-                    if target_name in get_all_targets(dataset):
-                        train_datasets_with_target.append(dataset)
-                if len(train_datasets_with_target) == 0:
-                    raise ValueError(
-                        f"Target {target_name} in the model's new capabilities is not "
-                        "present in any of the training datasets."
-                    )
-                composition_weights, composition_types = calculate_composition_weights(
-                    train_datasets_with_target, target_name
-                )
-                model.set_composition_weights(
-                    composition_weights.unsqueeze(0), composition_types
-                )
-
-        # Remove the composition from the datasets:
-        train_datasets = [
-            remove_composition_from_dataset(
-                train_datasets[0],
-                model.atomic_types,
-                model.alchemical_model.composition_weights.squeeze(0),
-            )
-        ]
-        val_datasets = [
-            remove_composition_from_dataset(
-                val_datasets[0],
-                model.atomic_types,
-                model.alchemical_model.composition_weights.squeeze(0),
-            )
-        ]
-
-        logger.info("Setting up data loaders")
-
-        # Create dataloader for the training datasets:
-        train_dataloaders = []
-        for dataset in train_datasets:
-            train_dataloaders.append(
-                DataLoader(
-                    dataset=dataset,
-                    batch_size=self.hypers["batch_size"],
-                    shuffle=True,
-                    collate_fn=collate_fn,
-                )
-            )
-        train_dataloader = CombinedDataLoader(train_dataloaders, shuffle=True)
-
-        # Create dataloader for the validation datasets:
-        val_dataloaders = []
-        for dataset in val_datasets:
-            val_dataloaders.append(
-                DataLoader(
-                    dataset=dataset,
-                    batch_size=self.hypers["batch_size"],
-                    shuffle=False,
-                    collate_fn=collate_fn,
-                )
-            )
-        val_dataloader = CombinedDataLoader(val_dataloaders, shuffle=False)
-
-        # Extract all the possible outputs and their gradients:
-        outputs_list = []
-        for target_name, target_info in model.dataset_info.targets.items():
-            outputs_list.append(target_name)
-            for gradient_name in target_info.gradients:
-                outputs_list.append(f"{target_name}_{gradient_name}_gradients")
-        # Create a loss weight dict:
-        loss_weights_dict = {}
-        for output_name in outputs_list:
-            loss_weights_dict[output_name] = (
-                self.hypers["loss"]["weights"][
-                    to_external_name(output_name, model.outputs)
-                ]
-                if to_external_name(output_name, model.outputs)
-                in self.hypers["loss"]["weights"]
-                else 1.0
-            )
-        loss_weights_dict_external = {
-            to_external_name(key, model.outputs): value
-            for key, value in loss_weights_dict.items()
-        }
-        # Update the loss weights in the hypers:
-        loss_hypers = copy.deepcopy(self.hypers["loss"])
-        loss_hypers["weights"] = loss_weights_dict
-        logging.info(f"Training with loss weights: {loss_weights_dict_external}")
-
-        # Create a loss function:
-        loss_fn = TensorMapDictLoss(
-            **loss_hypers,
-        )
-
-        # Create an optimizer:
-        optimizer = torch.optim.Adam(
-            model.parameters(), lr=self.hypers["learning_rate"]
-        )
-        if self.optimizer_state_dict is not None:
-            optimizer.load_state_dict(self.optimizer_state_dict)
-
-        # Create a scheduler:
-        lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
-            optimizer,
-            mode="min",
-            factor=self.hypers["scheduler_factor"],
-            patience=self.hypers["scheduler_patience"],
-        )
-        if self.scheduler_state_dict is not None:
-            lr_scheduler.load_state_dict(self.scheduler_state_dict)
-
-        # counters for early stopping:
-        best_val_loss = float("inf")
-        epochs_without_improvement = 0
-
-        # per-atom targets:
-        per_structure_targets = self.hypers["per_structure_targets"]
-
-        # Log the initial learning rate:
-        old_lr = optimizer.param_groups[0]["lr"]
-        logger.info(f"Initial learning rate: {old_lr}")
-
-        start_epoch = 0 if self.epoch is None else self.epoch + 1
-
-        # Train the model:
-        logger.info("Starting training")
-        for epoch in range(start_epoch, start_epoch + self.hypers["num_epochs"]):
-            train_rmse_calculator = RMSEAccumulator()
-            val_rmse_calculator = RMSEAccumulator()
-            if self.hypers["log_mae"]:
-                train_mae_calculator = MAEAccumulator()
-                val_mae_calculator = MAEAccumulator()
-
-            train_loss = 0.0
-            for batch in train_dataloader:
-                optimizer.zero_grad()
-
-                systems, targets = batch
-                systems, targets = systems_and_targets_to_device(
-                    systems, targets, device
-                )
-                for additive_model in model.additive_models:
-                    targets = remove_additive(
-                        systems, targets, additive_model, model.dataset_info.targets
-                    )
-                systems, targets = systems_and_targets_to_dtype(systems, targets, dtype)
-                predictions = evaluate_model(
-                    model,
-                    systems,
-                    {key: model.dataset_info.targets[key] for key in targets.keys()},
-                    is_training=True,
-                )
-
-                # average by the number of atoms
-                predictions = average_by_num_atoms(
-                    predictions, systems, per_structure_targets
-                )
-                targets = average_by_num_atoms(targets, systems, per_structure_targets)
-
-                train_loss_batch = loss_fn(predictions, targets)
-                train_loss += train_loss_batch.item()
-                train_loss_batch.backward()
-                optimizer.step()
-                train_rmse_calculator.update(predictions, targets)
-                if self.hypers["log_mae"]:
-                    train_mae_calculator.update(predictions, targets)
-
-            finalized_train_info = train_rmse_calculator.finalize(
-                not_per_atom=["positions_gradients"] + per_structure_targets
-            )
-            if self.hypers["log_mae"]:
-                finalized_train_info.update(
-                    train_mae_calculator.finalize(
-                        not_per_atom=["positions_gradients"] + per_structure_targets
-                    )
-                )
-
-            val_loss = 0.0
-            for batch in val_dataloader:
-                systems, targets = batch
-                assert len(systems[0].known_neighbor_lists()) > 0
-                systems, targets = systems_and_targets_to_device(
-                    systems, targets, device
-                )
-                for additive_model in model.additive_models:
-                    targets = remove_additive(
-                        systems, targets, additive_model, model.dataset_info.targets
-                    )
-                systems, targets = systems_and_targets_to_dtype(systems, targets, dtype)
-                predictions = evaluate_model(
-                    model,
-                    systems,
-                    {key: model.dataset_info.targets[key] for key in targets.keys()},
-                    is_training=False,
-                )
-
-                # average by the number of atoms
-                predictions = average_by_num_atoms(
-                    predictions, systems, per_structure_targets
-                )
-                targets = average_by_num_atoms(targets, systems, per_structure_targets)
-
-                val_loss_batch = loss_fn(predictions, targets)
-                val_loss += val_loss_batch.item()
-                val_rmse_calculator.update(predictions, targets)
-                if self.hypers["log_mae"]:
-                    val_mae_calculator.update(predictions, targets)
-
-            finalized_val_info = val_rmse_calculator.finalize(
-                not_per_atom=["positions_gradients"] + per_structure_targets
-            )
-            if self.hypers["log_mae"]:
-                finalized_val_info.update(
-                    val_mae_calculator.finalize(
-                        not_per_atom=["positions_gradients"] + per_structure_targets
-                    )
-                )
-
-            # Now we log the information:
-            finalized_train_info = {"loss": train_loss, **finalized_train_info}
-            finalized_val_info = {
-                "loss": val_loss,
-                **finalized_val_info,
-            }
-
-            if epoch == start_epoch:
-                metric_logger = MetricLogger(
-                    log_obj=logger,
-                    dataset_info=model.dataset_info,
-                    initial_metrics=[finalized_train_info, finalized_val_info],
-                    names=["training", "validation"],
-                )
-            if epoch % self.hypers["log_interval"] == 0:
-                metric_logger.log(
-                    metrics=[finalized_train_info, finalized_val_info],
-                    epoch=epoch,
-                )
-
-            lr_scheduler.step(val_loss)
-            new_lr = lr_scheduler.get_last_lr()[0]
-            if new_lr != old_lr:
-                logger.info(f"Changing learning rate from {old_lr} to {new_lr}")
-                old_lr = new_lr
-
-            if epoch % self.hypers["checkpoint_interval"] == 0:
-                self.optimizer_state_dict = optimizer.state_dict()
-                self.scheduler_state_dict = lr_scheduler.state_dict()
-                self.epoch = epoch
-                self.save_checkpoint(
-                    model, Path(checkpoint_dir) / f"model_{epoch}.ckpt"
-                )
-
-            # early stopping criterion:
-            if val_loss < best_val_loss:
-                best_val_loss = val_loss
-                epochs_without_improvement = 0
-            else:
-                epochs_without_improvement += 1
-                if epochs_without_improvement >= self.hypers["early_stopping_patience"]:
-                    logger.info(
-                        "Early stopping criterion reached after "
-                        f"{self.hypers['early_stopping_patience']} epochs "
-                        "without improvement."
-                    )
-                    break
-
-    def save_checkpoint(self, model, path: Union[str, Path]):
-        checkpoint = {
-            "architecture_name": "experimental.alchemical_model",
-            "model_hypers": {
-                "model_hypers": model.hypers,
-                "dataset_info": model.dataset_info,
-            },
-            "model_state_dict": model.state_dict(),
-            "train_hypers": self.hypers,
-            "epoch": self.epoch,
-            "optimizer_state_dict": self.optimizer_state_dict,
-            "scheduler_state_dict": self.scheduler_state_dict,
-        }
-        torch.save(
-            checkpoint,
-            check_file_extension(path, ".ckpt"),
-        )
-
-    @classmethod
-    def load_checkpoint(cls, path: Union[str, Path], train_hypers) -> "Trainer":
-
-        # Load the checkpoint
-        checkpoint = torch.load(path, weights_only=False, map_location="cpu")
-        model_hypers = checkpoint["model_hypers"]
-        model_state_dict = checkpoint["model_state_dict"]
-        epoch = checkpoint["epoch"]
-        optimizer_state_dict = checkpoint["optimizer_state_dict"]
-        scheduler_state_dict = checkpoint["scheduler_state_dict"]
-
-        # Create the trainer
-        trainer = cls(train_hypers)
-        trainer.optimizer_state_dict = optimizer_state_dict
-        trainer.scheduler_state_dict = scheduler_state_dict
-        trainer.epoch = epoch
-
-        # Create the model
-        model = AlchemicalModel(**model_hypers)
-        model.load_state_dict(model_state_dict)
-
-        return trainer
diff --git a/src/metatrain/experimental/alchemical_model/utils/__init__.py b/src/metatrain/experimental/alchemical_model/utils/__init__.py
deleted file mode 100644
index a1c166538..000000000
--- a/src/metatrain/experimental/alchemical_model/utils/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from .systems_to_torch_alchemical_batch import systems_to_torch_alchemical_batch
-
-__all__ = [
-    "systems_to_torch_alchemical_batch",
-]
diff --git a/src/metatrain/experimental/alchemical_model/utils/composition.py b/src/metatrain/experimental/alchemical_model/utils/composition.py
deleted file mode 100644
index 879672135..000000000
--- a/src/metatrain/experimental/alchemical_model/utils/composition.py
+++ /dev/null
@@ -1,69 +0,0 @@
-from typing import List, Tuple, Union
-
-import torch
-
-from ....utils.data.dataset import Dataset, get_atomic_types
-
-
-def calculate_composition_weights(
-    datasets: Union[Dataset, List[Dataset]], property: str
-) -> Tuple[torch.Tensor, List[int]]:
-    """Calculate the composition weights for a dataset.
-
-    It assumes per-system properties.
-
-    :param dataset: Dataset to calculate the composition weights for.
-    :returns: Composition weights for the dataset, as well as the
-        list of species that the weights correspond to.
-    """
-    if not isinstance(datasets, list):
-        datasets = [datasets]
-
-    # Note: `atomic_types` are sorted, and the composition weights are sorted as
-    # well, because the species are sorted in the composition features.
-    atomic_types = sorted(get_atomic_types(datasets))
-
-    targets = torch.stack(
-        [sample[property].block().values for dataset in datasets for sample in dataset]
-    )
-    targets = targets.squeeze(dim=(1, 2))  # remove component and property dimensions
-
-    total_num_structures = sum([len(dataset) for dataset in datasets])
-    dtype = datasets[0][0]["system"].positions.dtype
-    composition_features = torch.empty(
-        (total_num_structures, len(atomic_types)), dtype=dtype
-    )
-    structure_index = 0
-    for dataset in datasets:
-        for sample in dataset:
-            structure = sample["system"]
-            for j, s in enumerate(atomic_types):
-                composition_features[structure_index, j] = torch.sum(
-                    structure.types == s
-                )
-            structure_index += 1
-
-    regularizer = 1e-20
-    while regularizer:
-        if regularizer > 1e5:
-            raise RuntimeError(
-                "Failed to solve the linear system to calculate the "
-                "composition weights. The dataset is probably too small "
-                "or ill-conditioned."
-            )
-        try:
-            solution = torch.linalg.solve(
-                composition_features.T @ composition_features
-                + regularizer
-                * torch.eye(
-                    composition_features.shape[1],
-                    dtype=composition_features.dtype,
-                    device=composition_features.device,
-                ),
-                composition_features.T @ targets,
-            )
-            break
-        except torch._C._LinAlgError:
-            regularizer *= 10.0
-
-    return solution, atomic_types
diff --git a/src/metatrain/experimental/alchemical_model/utils/normalize.py b/src/metatrain/experimental/alchemical_model/utils/normalize.py
deleted file mode 100644
index 494aa0388..000000000
--- a/src/metatrain/experimental/alchemical_model/utils/normalize.py
+++ /dev/null
@@ -1,97 +0,0 @@
-from typing import List, Union
-
-import metatensor.torch
-import torch
-import torch.bin
-
-from metatrain.utils.data import Dataset
-
-
-def get_average_number_of_atoms(
-    datasets: List[Union[Dataset, torch.utils.data.Subset]]
-):
-    """Calculates the average number of atoms in a dataset.
-
-    :param datasets: A list of datasets.
-
-    :return: A `torch.Tensor` object with the average number of atoms.
-    """
-    average_number_of_atoms = []
-    for dataset in datasets:
-        dtype = dataset[0].system.positions.dtype
-        num_atoms = []
-        for i in range(len(dataset)):
-            system = dataset[i].system
-            num_atoms.append(len(system))
-        average_number_of_atoms.append(torch.mean(torch.tensor(num_atoms, dtype=dtype)))
-    return torch.tensor(average_number_of_atoms)
-
-
-def get_average_number_of_neighbors(
-    datasets: List[Union[Dataset, torch.utils.data.Subset]]
-) -> torch.Tensor:
-    """Calculate the average number of neighbor in a dataset.
-
-    :param datasets: A list of datasets.
-
-    :return: A `torch.Tensor` object with the average number of neighbor.
-    """
-    average_number_of_neighbors = []
-    for dataset in datasets:
-        num_neighbor = []
-        dtype = dataset[0].system.positions.dtype
-        for i in range(len(dataset)):
-            system = dataset[i].system
-            known_neighbor_lists = system.known_neighbor_lists()
-            if len(known_neighbor_lists) == 0:
-                raise ValueError(f"system {system} does not have a neighbor list")
-            elif len(known_neighbor_lists) > 1:
-                raise ValueError(
-                    "More than one neighbor list per system is not yet supported"
-                )
-            nl = system.get_neighbor_list(known_neighbor_lists[0])
-            num_neighbor.append(
-                torch.mean(
-                    torch.unique(nl.samples["first_atom"], return_counts=True)[1].to(
-                        dtype
-                    )
-                )
-            )
-        average_number_of_neighbors.append(torch.mean(torch.tensor(num_neighbor)))
-    return torch.tensor(average_number_of_neighbors)
-
-
-def remove_composition_from_dataset(
-    dataset: Union[Dataset, torch.utils.data.Subset],
-    atomic_types: List[int],
-    composition_weights: torch.Tensor,
-) -> Union[Dataset, torch.utils.data.Subset]:
-    """Remove the composition from the dataset.
-
-    :param datasets: A list of datasets.
-
-    :return: A list of datasets with the composition contribution removed.
-    """
-    # assert one property
-    first_sample = next(iter(dataset))
-    assert len(first_sample) == 2  # system and property
-    property_name = list(first_sample.keys())[1]
-
-    new_systems = []
-    new_properties = []
-    # remove composition from dataset
-    for i in range(len(dataset)):
-        system = dataset[i]["system"]
-        property = dataset[i][property_name]
-        numbers = system.types
-        composition = torch.bincount(numbers, minlength=max(atomic_types) + 1)
-        composition = composition[atomic_types].to(
-            device=composition_weights.device, dtype=composition_weights.dtype
-        )
-        property = metatensor.torch.subtract(
-            property, torch.dot(composition, composition_weights).item()
-        )
-        new_systems.append(system)
-        new_properties.append(property)
-
-    return Dataset.from_dict({"system": new_systems, property_name: new_properties})
diff --git a/src/metatrain/experimental/alchemical_model/utils/systems_to_torch_alchemical_batch.py b/src/metatrain/experimental/alchemical_model/utils/systems_to_torch_alchemical_batch.py
deleted file mode 100644
index 20c63481c..000000000
--- a/src/metatrain/experimental/alchemical_model/utils/systems_to_torch_alchemical_batch.py
+++ /dev/null
@@ -1,53 +0,0 @@
-from typing import Dict, List
-
-import torch
-from metatensor.torch.atomistic import NeighborListOptions, System
-
-
-def systems_to_torch_alchemical_batch(
-    systems: List[System], nl_options: NeighborListOptions
-) -> Dict[str, torch.Tensor]:
-    """
-    Convert a list of metatensor.torch.atomistic.Systems to a dictionary of torch
-    tensors compatible with torch_alchemiacal calculators.
-    """
-    device = systems[0].positions.device
-    positions = torch.cat([item.positions for item in systems])
-    cells = torch.cat([item.cell for item in systems])
-    numbers = torch.cat([item.types for item in systems])
-    ptr = torch.tensor([0] + [len(item) for item in systems]).cumsum(0)
-    batch = torch.repeat_interleave(
-        torch.arange(len(systems)), torch.tensor([len(item) for item in systems])
-    ).to(device)
-    edge_index_list = []
-    edge_offsets_list = []
-    for i, system in enumerate(systems):
-        nl = system.get_neighbor_list(nl_options)
-        samples = nl.samples
-        edge_index_item = torch.stack(
-            (samples.column("first_atom"), samples.column("second_atom")), dim=0
-        )
-        edge_offsets_item = torch.stack(
-            (
-                samples.column("cell_shift_a"),
-                samples.column("cell_shift_b"),
-                samples.column("cell_shift_c"),
-            ),
-            dim=0,
-        ).T
-        edge_index_list.append(edge_index_item + ptr[i])
-        edge_offsets_list.append(edge_offsets_item)
-
-    edge_indices = torch.cat(edge_index_list, dim=1)
-    edge_offsets = torch.cat(edge_offsets_list, dim=0)
-
-    batch_dict = {
-        "positions": positions,
-        "cells": cells,
-        "numbers": numbers,
-        "edge_indices": edge_indices,
-        "edge_offsets": edge_offsets,
-        "batch": batch,
-    }
-
-    return batch_dict
diff --git a/tests/utils/test_architectures.py b/tests/utils/test_architectures.py
index 86d684632..3a8cda642 100644
--- a/tests/utils/test_architectures.py
+++ b/tests/utils/test_architectures.py
@@ -24,7 +24,6 @@ def test_find_all_architectures():
     all_arches = find_all_architectures()
     assert len(all_arches) == 5
 
-    assert "experimental.alchemical_model" in all_arches
     assert "experimental.gap" in all_arches
     assert "experimental.pet" in all_arches
     assert "experimental.soap_bpnn" in all_arches
diff --git a/tox.ini b/tox.ini
index 47f9cad11..ff65084ec 100644
--- a/tox.ini
+++ b/tox.ini
@@ -99,16 +99,6 @@ changedir = src/metatrain/experimental/soap_bpnn/tests/
 commands =
     pytest {posargs}
 
-[testenv:alchemical-model-tests]
-description = Run Alchemical Model tests with pytest
-passenv = *
-deps =
-    pytest
-extras = alchemical-model
-changedir = src/metatrain/experimental/alchemical_model/tests/
-commands =
-    ; pytest {posargs}
-
 [testenv:pet-tests]
 description = Run PET tests with pytest
 passenv = *