Skip to content

Commit

Permalink
preprocess: Support centering netcdf crop window arbitrarily and adde…
Browse files Browse the repository at this point in the history
…d additional documentation to config file and docstrings. Added project to grid function
  • Loading branch information
JuLieAlgebra committed Mar 13, 2024
1 parent ef52f0b commit eb01bfa
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 8 deletions.
12 changes: 10 additions & 2 deletions config/preprocess_netcdf.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
[CropFiles]
# Full relative directory is data/input_dir, data/output_dir
input_dir = "ims_1km"
output_dir = "ims_netcdf_1km_cropped_4_000_000km_window"
window_size = 4000
# Change this output_dir name to reflect the window size and center coordinates selected
output_dir = "ims_netcdf_1km_cropped_4_000_000m_window"

# Final shape will be (window_size*2, window_size*2)
window_size = 4000 #km

# Expect str format in x,y coordinates if provided. Example: "1000, 2000"
# Defaults to center of current grid system if "None"
center_coordinates = "1000, 1500"#None"
55 changes: 49 additions & 6 deletions icedyno/preprocess/crop.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,25 @@
import pathlib

import luigi
import numpy as np
import xarray as xr


class CropFiles(luigi.Task):
"""
Crop IMS and MASIE NetCDF files from the center of their grids (where x, y == 1/2*sie.shape) based on input window_size.
Supports centering the window of the cropped files at different locations.
See config/preprocess_netcdf.toml for configuration settings.
"""

input_dir = luigi.Parameter()
output_dir = luigi.Parameter()

center_coordinates = luigi.Parameter()

window_size = luigi.IntParameter(default=4000)
year = luigi.IntParameter()
year = luigi.IntParameter() # Determined at runtime

def output(self) -> luigi.LocalTarget:
return luigi.LocalTarget(
Expand All @@ -39,19 +44,35 @@ def run(self) -> None:
)

for cdf_filepath in input_cdf_files:
# Set output file name and check if the output file already exists on disk.
output_filename = (
os.path.join(year_output_dir, pathlib.Path(cdf_filepath).stem)
+ f"_grid{self.window_size}.nc"
+ f"_grid{self.window_size}"
)
if self.center_coordinates != "None":
output_filename += (
f"_{self.center_coordinates.replace(' ', '')}center.nc"
)
else:
output_filename += ".nc"

if os.path.exists(output_filename):
print(cdf_filepath, "already on disk, skipping...")

# Open the original NetCDF file
ds = xr.open_dataset(cdf_filepath, engine="h5netcdf")

x_coord = ds["x"].shape[0] // 2
y_coord = ds["y"].shape[0] // 2
window = self.window_size * 1000
if self.center_coordinates != "None":
# Expects self.center_coordinates x,y to be "x, y" float values.
x, y = [float(coord) for coord in self.center_coordinates.split(",")]

# Project x, y to the defined grid and find the index in the grid corresponding to that pair.
x_coord = find_closest_index_in_grid(x, ds.x)
y_coord = find_closest_index_in_grid(y, ds.y)
else:
x_coord = ds["x"].shape[0] // 2
y_coord = ds["y"].shape[0] // 2
window = self.window_size * 1000 # from km to meters

cropped_ds = ds.sel(
x=slice(x_coord - window, x_coord + window),
Expand All @@ -62,6 +83,28 @@ def run(self) -> None:
cropped_ds.to_netcdf(output_filename, engine="h5netcdf")


def find_closest_index_in_grid(target: float, coordinates: np.array) -> int:
"""
Given a target coordinate in projected coordinates (x or y) and the list of coordinates, return the index of the closest number in the coordinates.
Assumes a 1km grid.
"""
assert np.allclose(coordinates % 1000, 500)
start = coordinates[
0
] # Assume that the first element of coordinates is the minimum number in the list
# -12287500.0 for IMS 1km data.

# Define the step size
grid_resolution = 1000 # meters

# Calculate the index of the closest number
index = int((target - start) // grid_resolution)

# If the solution is correct, the target and the found value should never be more than the grid_resolution apart.
assert abs(coordinates[index] - target) < grid_resolution
return index


if __name__ == "__main__":
os.environ["LUIGI_CONFIG_PARSER"] = "toml"

Expand All @@ -74,7 +117,7 @@ def run(self) -> None:

## Change acording to your number of cores
n_workers = 10
years = range(2014, 2025)
years = range(2015, 2025)

tasks = [CropFiles(year=year) for year in years]
luigi.build(tasks, workers=n_workers, local_scheduler=True)

0 comments on commit eb01bfa

Please sign in to comment.