MeteoSwiss · sadamov · Mar 7, 2024 · Oct 12, 2023 · Oct 12, 2023 · Oct 13, 2023
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -0,0 +1,33 @@
+name: Run pre-commit job
+
+on:
+ push:
+    branches:
+    - main
+ pull_request:
+    branches:
+    - main
+
+jobs:
+ pre-commit-job:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -l {0}
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.9
+    - name: Install pre-commit hooks
+      run: |
+        pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 \
+          --index-url https://download.pytorch.org/whl/cpu
+        pip install -r requirements.txt
+        pip install pyg-lib==0.2.0 torch-scatter==2.1.1 torch-sparse==0.6.17 \
+          torch-cluster==1.6.1 torch-geometric==2.3.1 \
+          -f https://pytorch-geometric.com/whl/torch-2.0.1+cpu.html
+    - name: Run pre-commit hooks
+      run: |
+        pre-commit run --all-files
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,7 @@ graphs
 sweeps
 test_*.sh
 lightning_logs
+.vscode
 
 ### Python ###
 # Byte-compiled / optimized / DLL files

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,51 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+    -   id: check-ast
+    -   id: check-case-conflict
+    -   id: check-docstring-first
+    -   id: check-symlinks
+    -   id: check-toml
+    -   id: check-yaml
+    -   id: debug-statements
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+-   repo: local
+    hooks:
+    -   id: codespell
+        name: codespell
+        description: Check for spelling errors
+        language: system
+        entry: codespell
+-   repo: local
+    hooks:
+    -   id: black
+        name: black
+        description: Format Python code
+        language: system
+        entry: black
+        types_or: [python, pyi]
+-   repo: local
+    hooks:
+    -   id: isort
+        name: isort
+        description: Group and sort Python imports
+        language: system
+        entry: isort
+        types_or: [python, pyi, cython]
+-   repo: local
+    hooks:
+    -   id: flake8
+        name: flake8
+        description: Check Python code for correctness, consistency and adherence to best practices
+        language: system
+        entry: flake8 --max-line-length=80 --ignore=E203,F811,I002,W503
+        types: [python]
+-   repo: local
+    hooks:
+    -   id: pylint
+        name: pylint
+        entry: pylint -rn -sn
+        language: system
+        types: [python]
diff --git a/.vscode/launch.json b/.vscode/launch.json
diff --git a/.vscode/settings.json b/.vscode/settings.json
diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
 <p align="middle">
     <img src="figures/neural_lam_header.png" width="700">
 </p>
+
 Neural-LAM is a repository of graph-based neural weather prediction models for Limited Area Modeling (LAM).
 The code uses [PyTorch](https://pytorch.org/) and [PyTorch Lightning](https://lightning.ai/pytorch-lightning).
 Graph Neural Networks are implemented using [PyG](https://pyg.org/) and logging is set up through [Weights & Biases](https://wandb.ai/).
@@ -11,16 +12,18 @@ The repository contains LAM versions of:
 * GraphCast, by [Lam et al. (2023)](https://arxiv.org/abs/2212.12794).
 * The hierarchical model from [Oskarsson et al. (2023)](https://arxiv.org/abs/2309.17370).
 
-For more information see our preprint: [*Graph-based Neural Weather Prediction for Limited Area Modeling*](https://arxiv.org/abs/2309.17370).
+For more information see our paper: [*Graph-based Neural Weather Prediction for Limited Area Modeling*](https://arxiv.org/abs/2309.17370).
 If you use Neural-LAM in your work, please cite:
 ```
-@article{oskarsson2023graphbased,
-      title={Graph-based Neural Weather Prediction for Limited Area Modeling},
-      author={Joel Oskarsson and Tomas Landelius and Fredrik Lindsten},
-      year={2023},
-      journal={arXiv preprint arXiv:2309.17370}
+@inproceedings{oskarsson2023graphbased,
+    title={Graph-based Neural Weather Prediction for Limited Area Modeling},
+    author={Oskarsson, Joel and Landelius, Tomas and Lindsten, Fredrik},
+    booktitle={NeurIPS 2023 Workshop on Tackling Climate Change with Machine Learning},
+    year={2023}
 }
 ```
+As the code in the repository is continuously evolving, the latest version might feature some small differences to what was used in the paper.
+See the branch [`ccai_paper_2023`](https://github.com/joeloskarsson/neural-lam/tree/ccai_paper_2023) for a revision of the code that reproduces the workshop paper.
 
 We plan to continue updating this repository as we improve existing models and develop new ones.
 Collaborations around this implementation are very welcome.
@@ -47,10 +50,11 @@ mamba env create -f environment.yml
 mamba activate neural-lam
 
 # Run the preprocessing/training scripts
+# (don't execute preprocessing scripts at the same time as training)
 sbatch slurm_train.sh
 
 # Run the evaluation script and generate plots and gif for TQV
-# (don't execute preprocessing scripts at the same time as training)
+# (by default this will use the pre-trained model from `wandb/example.ckpt`)
 sbatch slurm_eval.sh
 
 ```
@@ -101,9 +105,9 @@ Note that only the cuda version is pinned to 11.8, otherwise all the latest libr
 </span>
 
 \
-Follow the steps below to create the neccesary python environment.
+Follow the steps below to create the necessary python environment.
 
-1. Install GEOS for your system. For example with `sudo apt-get install libgeos-dev`. This is neccesary for the Cartopy requirement.
+1. Install GEOS for your system. For example with `sudo apt-get install libgeos-dev`. This is necessary for the Cartopy requirement.
 2. Use python 3.9.
 3. Install version 2.0.1 of PyTorch. Follow instructions on the [PyTorch webpage](https://pytorch.org/get-started/previous-versions/) for how to set this up with GPU support on your system.
 4. Install required packages specified in `requirements.txt`.
@@ -233,7 +237,7 @@ python train_model.py --model hi_lam --graph hierarchical ...
 ```
 
 ### Hi-LAM-Parallel
-A version of Hi-LAM where all message passing in the hierarchical mesh (up, down, inter-level) is ran in paralell.
+A version of Hi-LAM where all message passing in the hierarchical mesh (up, down, inter-level) is ran in parallel.
 Not included in the paper as initial experiments showed worse results than Hi-LAM, but could be interesting to try in more settings.
 
 To train Hi-LAM-Parallel use
@@ -343,6 +347,16 @@ In addition, hierarchical mesh graphs (`L > 1`) feature a few additional files w
 These files have the same list format as the ones above, but each list has length `L-1` (as these edges describe connections between levels).
 Entries 0 in these lists describe edges between the lowest levels 1 and 2.
 
+# Development and Contributing
+Any push or Pull-Request to the main branch will trigger a selection of pre-commit hooks.
+These hooks will run a series of checks on the code, like formatting and linting.
+If any of these checks fail the push or PR will be rejected.
+To test whether your code passes these checks before pushing, run
+``` bash
+pre-commit run --all-files
+```
+from the root directory of the repository.
+
 # Contact
 If you are interested in machine learning models for LAM, have questions about our implementation or ideas for extending it, feel free to get in touch.
 You can open a github issue on this page, or (if more suitable) send an email to [[email protected]](mailto:[email protected]).
diff --git a/create_grid_features.py b/create_grid_features.py
@@ -1,47 +1,62 @@
+# Standard library
 import os
 from argparse import ArgumentParser
 
+# Third-party
 import numpy as np
 import torch
 
 
 def main():
-    parser = ArgumentParser(description='Training arguments')
-    parser.add_argument('--dataset', type=str, default="meps_example",
-                        help='Dataset to compute weights for (default: meps_example)')
+    """
+    Pre-compute all static features related to the grid nodes
+    """
+    parser = ArgumentParser(description="Training arguments")
+    parser.add_argument(
+        "--dataset",
+        type=str,
+        default="meps_example",
+        help="Dataset to compute weights for (default: meps_example)",
+    )
     args = parser.parse_args()
 
     static_dir_path = os.path.join("data", args.dataset, "static")
 
     # -- Static grid node features --
-    grid_xy = torch.tensor(np.load(os.path.join(static_dir_path, "nwp_xy.npy")
-                                   ))  # (2, N_x, N_y)
+    grid_xy = torch.tensor(
+        np.load(os.path.join(static_dir_path, "nwp_xy.npy"))
+    )  # (2, N_x, N_y)
     grid_xy = grid_xy.flatten(1, 2).T  # (N_grid, 2)
     pos_max = torch.max(torch.abs(grid_xy))
     grid_xy = grid_xy / pos_max  # Divide by maximum coordinate
 
     geopotential = torch.tensor(
         np.load(
-            os.path.join(
-                static_dir_path,
-                "reference_geopotential_pressure.npy")))  # (N_x, N_y)
-    geopotential = geopotential.flatten(0, 1)  # (N_grid, N_static)
+            os.path.join(static_dir_path, "reference_geopotential_pressure.npy")
+        )
+    )  # (N_x, N_y, N_fields)
+    geopotential = geopotential.flatten(0, 1)  # (N_grid, N_fields)
+    gp_min = torch.min(geopotential)
+    gp_max = torch.max(geopotential)
+    # Rescale geopotential to [0,1]
+    geopotential = (geopotential - gp_min) / (
+        gp_max - gp_min
+    )  # (N_grid, N_fields)
 
     grid_border_mask = torch.tensor(
-        np.load(
-            os.path.join(
-                static_dir_path,
-                "border_mask.npy")),
-        dtype=torch.int64)  # (N_x, N_y)
-    grid_border_mask = grid_border_mask.flatten(0, 1).to(
-        torch.float).unsqueeze(1)  # (N_grid, 1)
+        np.load(os.path.join(static_dir_path, "border_mask.npy")),
+        dtype=torch.int64,
+    )  # (N_x, N_y)
+    grid_border_mask = (
+        grid_border_mask.flatten(0, 1).to(torch.float).unsqueeze(1)
+    )  # (N_grid, 1)
 
     # Concatenate grid features
-    grid_features = torch.cat((grid_xy, geopotential, grid_border_mask),
-                              dim=1)  # (N_grid, 3 + N_static)
+    grid_features = torch.cat(
+        (grid_xy, geopotential, grid_border_mask), dim=1
+    )  # (N_grid, 4)
 
-    torch.save(grid_features, os.path.join(
-        static_dir_path, "grid_features.pt"))
+    torch.save(grid_features, os.path.join(static_dir_path, "grid_features.pt"))
 
 
 if __name__ == "__main__":