Skip to content

Commit

Permalink
feat(sat-etl): Add validate option
Browse files Browse the repository at this point in the history
  • Loading branch information
devsjc committed Dec 24, 2024
1 parent 3cb9b7a commit 532236f
Showing 1 changed file with 34 additions and 0 deletions.
34 changes: 34 additions & 0 deletions containers/sat/download_process_sat.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,8 @@ def process_scans(
if pathlib.Path(zarr_path).exists():
_rewrite_zarr_times(zarr_path.as_posix())

check_data_quality(xr.open_zarr(zarr_path, consolidated=True))

return dstype


Expand Down Expand Up @@ -618,6 +620,32 @@ def _rewrite_zarr_times(output_name: str) -> None:
action="store_true",
default=False,
)
parser.add_argument(
"--validate",
help="Check the quality of the data",
action="store_true",
default=False,
)


def _calc_null_percentage(data: np.ndarray):
nulls = np.isnan(data)
return nulls.sum() / len(nulls)

def check_data_quality(ds: xr.Dataset) -> None:
result = xr.apply_ufunc(
calc_null_percentage,
ds.data_vars["data"],
input_core_dims=[["x_geostationary", "y_geostationary"]],
vectorize=True,
)
num_images_failing_nulls_threshold = (result > 0.05).sum().item()
num_images = result.size
log.info(
f"{num_images_failing_nulls_threshold}/{num_images} "
f"({num_images_failing_nulls_threshold/num_images:.2%}) "
"of images have greater than 5% null values",
)

def run(args: argparse.Namespace) -> None:
"""Run the download and processing pipeline."""
Expand Down Expand Up @@ -690,6 +718,12 @@ def run(args: argparse.Namespace) -> None:
)
log.info(f"Completed archive for args: {args}. ({new_average_secs_per_scan} seconds per scan).")

if args.validate:
for t in completed_types:
zarr_path: pathlib.Path = folder.parent / start.strftime(sat_config.zarr_fmtstr[t])
ds = xr.open_zarr(zarr_path, consolidated=True)
check_data_quality(ds)

# Delete raw files, if desired
if args.delete_raw:
log.info(f"Deleting {len(raw_paths)} raw files in {folder.as_posix()}.")
Expand Down

0 comments on commit 532236f

Please sign in to comment.