Skip to content

Commit

Permalink
add ingest function and unzip
Browse files Browse the repository at this point in the history
  • Loading branch information
Yang committed Oct 16, 2023
1 parent 313a14b commit 1f592aa
Showing 1 changed file with 51 additions and 0 deletions.
51 changes: 51 additions & 0 deletions src/zampy/datasets/land_cover.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from pathlib import Path
import numpy as np
from zipfile import ZipFile
from zampy.datasets import cds_utils
from zampy.datasets import validation
from zampy.datasets.dataset_protocol import SpatialBounds
Expand Down Expand Up @@ -85,3 +86,53 @@ def download(
)

return True

def ingest(
self,
download_dir: Path,
ingest_dir: Path,
overwrite: bool = False,
) -> bool:
download_folder = download_dir / self.name
ingest_folder = ingest_dir / self.name
ingest_folder.mkdir(parents=True, exist_ok=True)

archive_file_pattern = f"{self.name}_*.zip"
archive_files = list(download_folder.glob(archive_file_pattern))

for file in archive_files:
unzip_raw_to_netcdf(
ingest_folder,
file=file,
overwrite=overwrite,
)

copy_properties_file(download_folder, ingest_folder)

return True


def unzip_raw_to_netcdf(
ingest_folder: Path,
file: Path,
overwrite: bool = False,
) -> None:
"""Convert a downloaded zip netcdf file to a standard CF/Zampy netCDF file.
Args:
ingest_folder: Folder where the files have to be written to.
file: Path to the land cover .zip archive.
overwrite: Overwrite all existing files. If False, file that already exist will
be skipped.
"""
ncfile = ingest_folder / file.with_suffix(".nc").name
if ncfile.exists() and not overwrite:
print(f"File '{ncfile.name}' already exists, skipping...")
else:
extract_netcdf_to_zampy(file, ingest_folder)


def extract_netcdf_to_zampy(file, ingest_folder):
with ZipFile(file, 'r') as zip_object:
zipped_file_name = zip_object.namelist()[0]
zip_object.extract(zipped_file_name, path = ingest_folder)

0 comments on commit 1f592aa

Please sign in to comment.