Skip to content

Commit

Permalink
fix(ecmwf-s3): Make it read all from A2 files (#192)
Browse files Browse the repository at this point in the history
  • Loading branch information
devsjc authored Oct 22, 2024
1 parent 612bb6f commit c9047bb
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 24 deletions.
20 changes: 4 additions & 16 deletions src/nwp_consumer/internal/inputs/ecmwf/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,23 +78,10 @@ def listRawFilesForInitTime(self, *, it: dt.datetime) -> list[internal.FileInfoM
"""Overrides the corresponding method in the parent class."""
allFiles: list[str] = self.__fs.ls((self.bucket / self.bucketPath).as_posix())
# List items are of the form "bucket/folder/filename, so extract just the filename

fileprefix: str
match self.area:
case "uk":
fileprefix = "A1D"
case "nw-india":
fileprefix = "A1D"
case "india":
fileprefix = "A2D"
case _:
log.warn(event="Unknown area", area=self.area)
return []

initTimeFiles: list[internal.FileInfoModel] = [
ECMWFLiveFileInfo(fname=pathlib.Path(file).name)
for file in allFiles
if it.strftime(f"{fileprefix}%m%d%H") in file
if it.strftime("A2D%m%d%H") in file
]
return initTimeFiles

Expand Down Expand Up @@ -133,8 +120,9 @@ def mapCachedRaw(self, *, p: pathlib.Path) -> xr.Dataset:
"""Overrides the corresponding method in the parent class."""
all_dss: list[xr.Dataset] = cfgrib.open_datasets(p.as_posix())
area_dss: list[xr.Dataset] = _filterDatasetsByArea(all_dss, self.area)
del all_dss
if len(area_dss) == 0:
log.error(
log.warn(
event="No datasets found for area",
area=self.area,
file=p,
Expand All @@ -143,7 +131,7 @@ def mapCachedRaw(self, *, p: pathlib.Path) -> xr.Dataset:
return xr.Dataset()

ds: xr.Dataset = xr.merge(area_dss, combine_attrs="drop_conflicts")
del area_dss, all_dss
del area_dss

ds = ds.drop_vars(
names=[v for v in ds.coords if v not in COORDINATE_ALLOW_LIST],
Expand Down
16 changes: 8 additions & 8 deletions src/nwp_consumer/internal/inputs/ecmwf/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,12 @@ def tearDownClass(cls) -> None:

def test_listFilesForInitTime(self) -> None:
files = [
"A1D01010000010100001",
"A1D01010000010101001",
"A1D01010000010102011",
"A1D01010000010103001",
"A1D01011200010112001", # Different init time
"A1D02191200010112001", # Leap year on 2024-02-29
"A2D01010000010100001",
"A2D01010000010101001",
"A2D01010000010102011",
"A2D01010000010103001",
"A2D01011200010112001", # Different init time
"A2D02191200010112001", # Leap year on 2024-02-29
]
for file in files:
# Create files in the mock bucket
Expand All @@ -97,12 +97,12 @@ def test_downloadRawFile(self) -> None:
# Create a file in the mock bucket
self.testS3.put_object(
Bucket=BUCKET,
Key=(RAW / "A1D01010000010100001").as_posix(),
Key=(RAW / "A2D01010000010100001").as_posix(),
Body=b"test",
)

# Test the downloadRawFile method
out = self.client.downloadToCache(fi=ECMWFLiveFileInfo(fname="A1D01010000010100001"))
out = self.client.downloadToCache(fi=ECMWFLiveFileInfo(fname="A2D01010000010100001"))
self.assertEqual(out.read_bytes(), b"test")

out.unlink()
Expand Down

0 comments on commit c9047bb

Please sign in to comment.