-
Notifications
You must be signed in to change notification settings - Fork 3
/
make_zarr.py
123 lines (103 loc) · 4.82 KB
/
make_zarr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import rioxarray
from rasterio.crs import CRS
import rasterio
import xarray as xr
import argparse
import numpy as np
import geopandas as gpd
import os
import warnings
warnings.filterwarnings("ignore")
def make_zarr(item, chunking, data_path, store_path):
print('\tProcessing igm file...', flush=True)
# Read the igm file
# NOTE: this path works if original data downloaded is not in folder_name == 'aviris_data'
# If downloaded data is stored in folder_name == 'aviris_data' or similar, change path to
# igm_path = os.path.join(data_path, f"{item}_rdn_igm")
igm_path = f"{item}_rdn_igm"
#igm_path = os.path.join(data_path, f"{item}_rdn_igm")
igm = xr.open_dataset(igm_path, engine='rasterio')
# Define easting, northing, elevation
easting_data = igm.band_data[0].data
northing_data = igm.band_data[1].data
elevation_data = igm.band_data[2].data
# Make new dataset
new_igm = xr.Dataset(
{
'Easting': (('y', 'x'), easting_data),
'Northing': (('y', 'x'), northing_data),
'Elevation': (('y', 'x'), elevation_data),
},
coords = {
'y': ('y', igm.y.data),
'x': ('x', igm.x.data)
}
)
print('\tProcessing rfl file...', flush=True)
# Read the rfl file under L2a/
# NOTE: this path works if original data downloaded is not in folder_name == 'aviris_data'
# If downloaded data is stored in folder_name == 'aviris_data' or similar, change path to
# rfl_path = os.path.join(data_path, f"{item}_rfl")
rfl_path = f"{item}_rfl"
rfl = xr.open_dataset(rfl_path, engine='rasterio')
# Swap to be able to select based on wavelength
rfl = rfl.swap_dims({'band': 'wavelength'})
# Define reflectance
rfl = rfl.rename({'band_data': 'Reflectance'})
# Combine rfl and igm data
merged = rfl.merge(new_igm)
print('\tProcessing rdn file...', flush=True)
# Read the rdn file under L1/
# NOTE: this path works if original data downloaded is not in folder_name == 'aviris_data'
# If downloaded data is stored in folder_name == 'aviris_data' or similar, change path to
# rdn_path = os.path.join(data_path, f"{item}_rdn_v2z4_clip")
rdn_path = f"{item}_rdn_v2z4_clip"
rdn = xr.open_dataset(rdn_path, engine='rasterio')
# Swap to be able to select based on wavelength
rdn = rdn.swap_dims({'band': 'wavelength'})
# Define radiance
rdn = rdn.rename({'band_data': 'Radiance'})
print('\tMerging files...', flush=True)
# Combine all data
merged = merged.merge(rdn)
merged = merged.transpose('x', 'y', 'wavelength')
# Add CRS and define spatial dimensions
merged = merged.rio.write_crs(32610, inplace=True)
merged = merged.rio.set_spatial_dims("x", "y", inplace=True)
print('\tChunk and write Zarr...', flush=True)
# Chunk data
if chunking is not None:
merged = merged.chunk({'x':chunking[0], 'y':chunking[1], 'wavelength':chunking[2]})
# Save to local store
merged.to_zarr(store=store_path, mode='w', consolidated=True)
def setup_opts():
parser = argparse.ArgumentParser()
parser.add_argument('--username', type=str, default='mdunn', help='username')
# If wanting to save zarr archives in separate folder within working directory, modify folder_name and dataset_date
parser.add_argument('--folder_name', type=str, default='', help='data folder')
parser.add_argument('--dataset_date', type=str, default='', help='dataset date')
parser.add_argument('--x_chunk', type=int, default=100, help='chunk size in x, set 0 for no chunking')
parser.add_argument('--y_chunk', type=int, default=100, help='chunk size in y, set 0 for no chunking')
parser.add_argument('--wavelength_chunk', type=int, default=100, help='chunk size in wavelength, set 0 for no chunking')
parser.add_argument('--item', type=str, help='name of flight path')
return parser.parse_args()
def main(opts):
username = opts.username
folder_name = opts.folder_name
dataset_date = opts.dataset_date
data_path = os.path.join(folder_name, dataset_date)
item = opts.item
chunking_strategy = (opts.x_chunk, opts.y_chunk, opts.wavelength_chunk)
chunking = None if chunking_strategy==(0, 0, 0) else chunking_strategy # If using default/no chunking, set to None
print(f'Making zarr for dataset: {dataset_date}, item: {item}...', flush=True)
if chunking is None:
zarr_filepath = f'{item}_default.zarr'
else:
zarr_filepath = f'{item}_{chunking[0]:03d}-{chunking[1]:03d}-{chunking[2]:03d}.zarr'
store_path = f'/discover/nobackup/projects/SBG-DO/{username}/{folder_name}/{dataset_date}/{zarr_filepath}'
print(f'Store path: {store_path}', flush=True)
make_zarr(item, chunking, data_path, store_path)
if __name__ == '__main__':
opts = setup_opts()
main(opts)
print('Done!')