From 478e06c02fe6f5986925982265ede60293fccf41 Mon Sep 17 00:00:00 2001 From: Rahul Maurya Date: Thu, 4 Apr 2024 17:30:33 +0530 Subject: [PATCH] ruff fixed (docstring remaining) --- .ruff.toml | 20 +++++++++---------- graph_weather/data/const.py | 3 ++- graph_weather/data/dataloader.py | 17 +++++++++++----- graph_weather/models/analysis.py | 34 ++++++++++++++++---------------- graph_weather/models/forecast.py | 3 ++- graph_weather/models/losses.py | 2 ++ train/deepspeed_graph.py | 6 +----- train/run.py | 8 +++++--- train/run_fulll.py | 8 ++++---- 9 files changed, 55 insertions(+), 46 deletions(-) diff --git a/.ruff.toml b/.ruff.toml index e675595d..e66753c0 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -1,10 +1,10 @@ # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default. -select = ["E", "F", "D", "I"] -ignore = ["D200","D202","D210","D212","D415","D105",] +lint.select = ["E", "F", "D", "I"] +lint.ignore = ["D200","D202","D210","D212","D415","D105","D101","D107","D103","D102","D100"] # Allow autofix for all enabled rules (when `--fix`) is provided. -fixable = ["A", "B", "C", "D", "E", "F", "I"] -unfixable = [] +lint.fixable = ["A", "B", "C", "D", "E", "F", "I"] +lint.unfixable = [] # Exclude a variety of commonly ignored directories. exclude = [ @@ -35,22 +35,22 @@ exclude = [ line-length = 100 # Allow unused variables when underscore-prefixed. -dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" +lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" # Assume Python 3.10. target-version = "py311" -fix = false +fix=false # Group violations by containing file. output-format = "github" -ignore-init-module-imports = true +lint.ignore-init-module-imports = true -[mccabe] +[lint.mccabe] # Unlike Flake8, default to a complexity level of 10. max-complexity = 10 -[pydocstyle] +[lint.pydocstyle] # Use Google-style docstrings. convention = "google" -[per-file-ignores] +[lint.per-file-ignores] "__init__.py" = ["F401", "E402"] diff --git a/graph_weather/data/const.py b/graph_weather/data/const.py index e1d57b3d..e064cb38 100644 --- a/graph_weather/data/const.py +++ b/graph_weather/data/const.py @@ -6,7 +6,8 @@ 2. GFS Forecast Fields 3. ERA5 Reanalysis Fields -where the variance is the variance in the 3 hour change for a variable averaged across all lat/lon and pressure levels +where the variance is the variance in the 3 hour change for a variable averaged across all lat/lon + and pressure levels and time for (~100 random temporal frames, more the better) Min/Max/Mean/Stddev for all those plus each type of observation in observation files diff --git a/graph_weather/data/dataloader.py b/graph_weather/data/dataloader.py index 19b7d7a8..e30e0e67 100644 --- a/graph_weather/data/dataloader.py +++ b/graph_weather/data/dataloader.py @@ -2,8 +2,10 @@ The dataloader has to do a few things for the model to work correctly -1. Load the land-0sea mask, orography dataset, regridded from 0.1 to the correct resolution -2. Calculate the top-of-atmosphere solar radiation for each location at fcurrent time and 10 other +1. Load the land-0sea mask, orography dataset, regridded from 0.1 to the +correct resolution +2. Calculate the top-of-atmosphere solar radiation for each location at +fcurrent time and 10 other times +- 12 hours 3. Add day-of-year, sin(lat), cos(lat), sin(lon), cos(lon) as well 3. Batch data as either in geometric batches, or more normally @@ -124,7 +126,8 @@ def __getitem__(self, item): ], axis=-1, ) - # Not want to predict non-physics variables -> Output only the data variables? Would be simpler, and just add in the new ones each time + # Not want to predict non-physics variables -> Output only the data variables? + # Would be simpler, and just add in the new ones each time output_data = np.stack( [ @@ -154,9 +157,13 @@ def __getitem__(self, item): obs_data = xr.open_zarr( "/home/jacob/Development/prepbufr.gdas.20160101.t00z.nr.48h.raw.zarr", consolidated=True ) -# TODO Embedding? These should stay consistent across all of the inputs, so can just load the values, not the strings? -# Should only take in the quality markers, observations, reported observation time relative to start point + +# TODO Embedding? These should stay consistent across all of the inputs, so can just load the values +# not the strings? +# Should only take in the quality markers, observations, reported observation time relative to start +# point # Observation errors, and background values, lat/lon/height/speed of observing thing + print(obs_data) print(obs_data.hdr_inst_typ.values) print(obs_data.hdr_irpt_typ.values) diff --git a/graph_weather/models/analysis.py b/graph_weather/models/analysis.py index d44abc11..fb707e3a 100644 --- a/graph_weather/models/analysis.py +++ b/graph_weather/models/analysis.py @@ -32,23 +32,23 @@ def __init__( Args: observation_lat_lons: Lat/lon points of the observations - output_lat_lons: List of latitude and longitudes for the output analysis - resolution: Resolution of the H3 grid, prefer even resolutions, as - odd ones have octogons and heptagons as well - observation_dim: Input feature size - analysis_dim: Output Analysis feature dim - node_dim: Node hidden dimension - edge_dim: Edge hidden dimension - num_blocks: Number of message passing blocks in the Processor - hidden_dim_processor_node: Hidden dimension of the node processors - hidden_dim_processor_edge: Hidden dimension of the edge processors - hidden_layers_processor_node: Number of hidden layers in the node processors - hidden_layers_processor_edge: Number of hidden layers in the edge processors - hidden_dim_decoder:Number of hidden dimensions in the decoder - hidden_layers_decoder: Number of layers in the decoder - norm_type: Type of norm for the MLPs - one of 'LayerNorm', 'GraphNorm', 'InstanceNorm', 'BatchNorm', 'MessageNorm', or None - use_checkpointing: Whether to use gradient checkpointing or not + output_lat_lons: List of latitude and longitudes for the output analysis + resolution: Resolution of the H3 grid, prefer even resolutions, as + odd ones have octogons and heptagons as well + observation_dim: Input feature size + analysis_dim: Output Analysis feature dim + node_dim: Node hidden dimension + edge_dim: Edge hidden dimension + num_blocks: Number of message passing blocks in the Processor + hidden_dim_processor_node: Hidden dimension of the node processors + hidden_dim_processor_edge: Hidden dimension of the edge processors + hidden_layers_processor_node: Number of hidden layers in the node processors + hidden_layers_processor_edge: Number of hidden layers in the edge processors + hidden_dim_decoder:Number of hidden dimensions in the decoder + hidden_layers_decoder: Number of layers in the decoder + norm_type: Type of norm for the MLPs + one of 'LayerNorm', 'GraphNorm', 'InstanceNorm', 'BatchNorm', 'MessageNorm', or None + use_checkpointing: Whether to use gradient checkpointing or not """ super().__init__() diff --git a/graph_weather/models/forecast.py b/graph_weather/models/forecast.py index 267bc950..6ba6d523 100644 --- a/graph_weather/models/forecast.py +++ b/graph_weather/models/forecast.py @@ -39,7 +39,8 @@ def __init__( odd ones have octogons and heptagons as well feature_dim: Input feature size aux_dim: Number of non-NWP features (i.e. landsea mask, lat/lon, etc) - output_dim: Optional, output feature size, useful if want only subset of variables in output + output_dim: Optional, output feature size, useful if want only subset of variables in + output node_dim: Node hidden dimension edge_dim: Edge hidden dimension num_blocks: Number of message passing blocks in the Processor diff --git a/graph_weather/models/losses.py b/graph_weather/models/losses.py index c0d8035d..8ef9af19 100644 --- a/graph_weather/models/losses.py +++ b/graph_weather/models/losses.py @@ -25,6 +25,8 @@ def __init__( Args: feature_variance: Variance for each of the physical features lat_lons: List of lat/lon pairs, used to generate weighting + device: checks for device whether it supports gpu or not + normalize: option for normalize """ # TODO Rescale by nominal static air density at each pressure level super().__init__() diff --git a/train/deepspeed_graph.py b/train/deepspeed_graph.py index 23ede888..8a191949 100644 --- a/train/deepspeed_graph.py +++ b/train/deepspeed_graph.py @@ -1,6 +1,7 @@ import pytorch_lightning as pl import torch from pytorch_lightning import Trainer +from torch.utils.data import DataLoader, Dataset from graph_weather import GraphWeatherForecaster @@ -32,11 +33,6 @@ def configure_optimizers(self): def forward(self, x): return self.model(x) - -# Fake data -from torch.utils.data import DataLoader, Dataset - - class FakeDataset(Dataset): def __init__(self): super(FakeDataset, self).__init__() diff --git a/train/run.py b/train/run.py index 91eb403b..7a60227e 100644 --- a/train/run.py +++ b/train/run.py @@ -1,5 +1,7 @@ """Training script for training the weather forecasting model""" +import time + import datasets import numpy as np import pandas as pd @@ -22,7 +24,7 @@ def worker_init_fn(worker_id): np.random.seed(np.random.get_state()[1][0] + worker_id) -def get_mean_stds(): +def get_mean_stds(): # noqa: D103 names = [ "CLMR", "GRLE", @@ -333,7 +335,7 @@ def __iter__(self): seed=np.random.randint(low=-1000, high=10000), buffer_size=4 ) for data in iter(self.dataset): - # TODO Currently leaves out lat/lon/Sun irradience, and land/sea mask and topographic data + #TODO Currently leaves out lat/lon/Sun irradience, and land/sea mask and topographic data data.update( { key: np.expand_dims(np.asarray(value), axis=-1) @@ -468,7 +470,7 @@ def __iter__(self): ).to(device) optimizer = optim.AdamW(model.parameters(), lr=0.001) print("Done Setup") -import time + for epoch in range(100): # loop over the dataset multiple times running_loss = 0.0 diff --git a/train/run_fulll.py b/train/run_fulll.py index 0acac1a3..089a2d4b 100644 --- a/train/run_fulll.py +++ b/train/run_fulll.py @@ -3,9 +3,7 @@ import json import os import sys - -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.append(BASE_DIR) +import time import numpy as np import torch @@ -18,6 +16,8 @@ from graph_weather.data import const from graph_weather.models.losses import NormalizedMSELoss +BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(BASE_DIR) class XrDataset(Dataset): def __init__(self): @@ -110,7 +110,7 @@ def __getitem__(self, item): model = GraphWeatherForecaster(lat_lons, feature_dim=597, num_blocks=6).to(device) optimizer = optim.AdamW(model.parameters(), lr=0.001) print("Done Setup") -import time + for epoch in range(100): # loop over the dataset multiple times running_loss = 0.0