Skip to content

Commit

Permalink
feat(repositories): Add MetOffice DataHub repository
Browse files Browse the repository at this point in the history
  • Loading branch information
devsjc committed Nov 22, 2024
1 parent f7922a7 commit e3c016f
Show file tree
Hide file tree
Showing 22 changed files with 613 additions and 170 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/branch_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ jobs:
uses: docker/setup-buildx-action@v3

- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
Expand All @@ -178,6 +178,7 @@ jobs:
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64
platforms: linux/amd64
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache
cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache

3 changes: 3 additions & 0 deletions .github/workflows/main_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ concurrency:
jobs:

# Define an autotagger job that creates tags on changes to master
# Use #major #minor in merge commit messages to bump version beyond patch
# See https://github.com/RueLaLa/auto-tagger?tab=readme-ov-file#usage
tag:
runs-on: ubuntu-latest
if: |
Expand All @@ -34,3 +36,4 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_PR_NUMBER: ${{ github.event.number }}

4 changes: 1 addition & 3 deletions .github/workflows/tagged_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,13 @@ env:

jobs:

# Job to create a container
# Job for building container image
# * Builds and pushes an OCI Container image to the registry defined in the environment variables
build-container:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
needs: ["lint-typecheck", "test-unit"]

steps:
# Do a non-shallow clone of the repo to ensure tags are present
Expand All @@ -44,7 +42,7 @@ jobs:
uses: docker/setup-buildx-action@v3

- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ docs/
# Environments
.venv
uv.lock
.env

# mypy
.mypy_cache/
Expand Down
106 changes: 66 additions & 40 deletions Containerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,31 @@
# POTENTIAL FOR SMALLER CONTAINERFILE IF THIS CAN BE GOT WORKING


# # --- Base Python image -----------------------------------------------------------------
# # --- Base Python image ---------------------------------------------------------------
# FROM python:3.12-bookworm AS python-base
#
# --- Builder image creation -------------------------------------------------------------
# FROM python-base AS builder
#
# Setup non-root user
# ARG USER=monty
# RUN groupadd ${USER} && useradd -m ${USER} -g ${USER}
# USER ${USER}
# ENV PATH="/home/${USER}/.local/bin:${PATH}"
#
# WORKDIR /home/${USER}
#
# Don't generate .pyc, enable tracebacks
# ENV LANG=C.UTF-8 \
# LC_ALL=C.UTF-8 \
# PYTHONDONTWRITEBYTECODE=1 \
# PYTHONFAULTHANDLER=1
#
# # COPY --from=ghcr.io/astral-sh/uv:python3.12-bookworm --chown=1000:1000 /usr/local/bin/uv /home/${USER}/.local/bin/uv
# COPY --from=ghcr.io/astral-sh/uv:python3.12-bookworm /usr/local/bin/uv /usr/local/bin/uv
#
# RUN uv --version
#
# # --- Distroless Container creation -----------------------------------------------------
# FROM gcr.io/distroless/cc-debian12 AS python-distroless
#
Expand All @@ -15,79 +37,83 @@
# COPY --from=python-base /etc/ld.so.cache /etc/ld.so.cache
#
# # Add common compiled libraries
# COPY --from=python-base /usr/lib/${CHIPSET_ARCH}/libz.so.1 /lib/${CHIPSET_ARCH}/
# COPY --from=python-base /usr/lib/${CHIPSET_ARCH}/libffi.so.8 /lib/${CHIPSET_ARCH}/
# COPY --from=python-base /usr/lib/${CHIPSET_ARCH}/libbz2.so.1.0 /lib/${CHIPSET_ARCH}/
# COPY --from=python-base /usr/lib/${CHIPSET_ARCH}/libm.so.6 /lib/${CHIPSET_ARCH}/
# COPY --from=python-base /usr/lib/${CHIPSET_ARCH}/libc.so.6 /lib/${CHIPSET_ARCH}/
# COPY --from=python-base /usr/lib/${CHIPSET_ARCH}/libz.so.1 /usr/lib/${CHIPSET_ARCH}/
# COPY --from=python-base /usr/lib/${CHIPSET_ARCH}/libffi* /usr/lib/${CHIPSET_ARCH}/
# # COPY --from=python-base /usr/lib/${CHIPSET_ARCH}/libbz2.so.1.0 /usr/lib/${CHIPSET_ARCH}/
# # COPY --from=python-base /lib/${CHIPSET_ARCH}/libm.so.6 /lib/${CHIPSET_ARCH}/
# COPY --from=python-base /usr/lib/${CHIPSET_ARCH}/libc.so.6 /usr/lib/${CHIPSET_ARCH}/
#
# Create non root user
# ARG USER=monty
# COPY --from=python-base /bin/echo /bin/echo
# COPY --from=python-base /bin/rm /bin/rm
# COPY --from=python-base /bin/sh /bin/sh
#
# RUN echo "${USER}:x:1000:${USER}" >> /etc/group
# RUN echo "${USER}:x:1001:" >> /etc/group
# RUN echo "${USER}:x:1000:1001::/home/${USER}:" >> /etc/passwd
#
# Check python installation works
# RUN python --version
# RUN rm /bin/sh /bin/echo /bin/rm
#
# # Don't generate .pyc, enable tracebacks
# Don't generate .pyc, enable tracebacks
# ENV LANG=C.UTF-8 \
# LC_ALL=C.UTF-8 \
# PYTHONDONTWRITEBYTECODE=1 \
# PYTHONFAULTHANDLER=1
#
# # Check python installation works
# COPY --from=python-base /bin/rm /bin/rm
# COPY --from=python-base /bin/sh /bin/sh
# RUN python --version
# RUN rm /bin/sh /bin/rm
# # --- Build the application -------------------------------------------------------------
# FROM builder AS build-app
#
# # --- Virtualenv builder image ----------------------------------------------------------
# FROM python-base AS build-venv
# COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
# WORKDIR /app
#
# # Install dependencies using system python
# ENV UV_LINK_MODE=copy \
# UV_COMPILE_BYTECODE=1 \
# UV_PYTHON_DOWNLOADS=never \
# UV_PYTHON=python3.12 \
# UV_NO_CACHE=1 \
# CFLAGS="-g0 -Wl,--strip-all" \
# VENV=/.venv
#
# COPY pyproject.toml ./
# CFLAGS="-g0 -Wl,--strip-all"
#
# # Synchronize DEPENDENCIES without the application itself.
# # This layer is cached until uv.lock or pyproject.toml change.
# # This layer is cached until pyproject.toml changes.
# # Delete any unwanted parts of the installed packages to reduce size
# RUN uv venv ${VENV} && \
# echo "Installing dependencies into ${VENV}" && \
# RUN --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
# echo "Installing dependencies" && \
# mkdir src && \
# du -h ${VENV}/lib/python3.12/site-packages && \
# uv sync --no-dev --no-install-project && \
# echo "Copying libpython package into ${VENV}" && \
# cp --remove-destination /usr/local/bin/python3.12 ${VENV}/bin/python && \
# cp /usr/local/lib/libpython3.12.so.1.0 ${VENV}/lib/ && \
# echo "Optimizing site-packages" && \
# rm -r ${VENV}/lib/python3.12/site-packages/**/tests && \
# du -h ${VENV}/lib/python3.12/site-packages | sort -h | tail -n 4
#
# COPY . /src
# RUN uv pip install --no-deps /src && ls /.venv/bin
# uv sync --no-dev --no-install-project && uv run python -m eccodes selfcheck
# # echo "Optimizing site-packages" && \
# # rm -r .venv/.local/lib/python3.12/site-packages/**/tests && \
# # du -h .venv/.local/lib/python3.12/site-packages | sort -h | tail -n 4
#
# COPY . .
#
# RUN python -m eccodes selfcheck
#
# # --- Distroless App image --------------------------------------------------------------
# FROM python-distroless
#
# COPY --from=build-venv /.venv /venv
# COPY --from=build-app /usr/local /usr/local
#
# ENV RAWDIR=/work/raw \
# ZARRDIR=/work/data \
# ECCODES_DEFINITION_PATH=.venv/share/eccodes/definitions
# ZARRDIR=/work/data
#
# ENTRYPOINT ["/venv/bin/nwp-consumer-cli"]
# ENTRYPOINT ["nwp-consumer-cli"]
# VOLUME /work
# STOPSIGNAL SIGINT


# WORKING CONTAINERFILE


FROM quay.io/condaforge/miniforge3:latest AS build-venv

COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv

ENV UV_LINK_MODE=copy \
UV_COMPILE_BYTECODE=1 \
UV_PYTHON_DOWNLOADS=never \
UV_LINK_MODE=copy \
UV_PYTHON=python3.12 \
UV_PROJECT_ENVIRONMENT=/venv
COPY pyproject.toml /_lock/
Expand All @@ -97,7 +123,7 @@ COPY pyproject.toml /_lock/
# Delete any unwanted parts of the installed packages to reduce size
RUN apt-get -qq update && apt-get -qq -y install gcc && \
echo "Creating virtualenv at /venv" && \
conda create --quiet --yes -p /venv python=3.12 numcodecs eccodes
conda create --quiet --yes -p /venv python=3.12 eccodes
RUN echo "Installing dependencies into /venv" && \
cd /_lock && \
mkdir src && \
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ name = "nwp-consumer"
dynamic = ["version"] # Set automtically using git: https://setuptools-git-versioning.readthedocs.io/en/stable/
description = "Tool for aggregating raw NWP files into .zarr files"
readme = {file = "README.md", content-type = "text/markdown"}
requires-python = ">=3.12.0"
requires-python = ">=3.12,<3.13"
license = {text = "MIT License"}
authors = [
{ name = "Sol Cotton", email = "[email protected]"}
Expand Down
20 changes: 15 additions & 5 deletions src/nwp_consumer/cmd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,31 @@ def parse_env() -> Adaptors:
"""Parse from the environment."""
model_repository_adaptor: type[ports.ModelRepository]
match os.getenv("MODEL_REPOSITORY"):
# Default to NOAA S3 as it is freely accessible
case None | "gfs":
model_repository_adaptor = repositories.NOAAS3ModelRepository
model_repository_adaptor = \
repositories.model_repositories.NOAAS3ModelRepository
case "ceda":
model_repository_adaptor = repositories.CEDAFTPModelRepository
model_repository_adaptor = \
repositories.model_repositories.CEDAFTPModelRepository
case "ecmwf-realtime":
model_repository_adaptor = repositories.ECMWFRealTimeS3ModelRepository
model_repository_adaptor = \
repositories.model_repositories.ECMWFRealTimeS3ModelRepository
case "metoffice-datahub":
model_repository_adaptor = \
repositories.model_repositories.MetOfficeDatahubModelRepository
case _ as model:
log.error(f"Unknown model: {model}")
sys.exit(1)

notification_repository_adaptor: type[ports.NotificationRepository]
match os.getenv("NOTIFICATION_REPOSITORY", "stdout"):
case "stdout":
notification_repository_adaptor = repositories.StdoutNotificationRepository
notification_repository_adaptor = \
repositories.notification_repositories.StdoutNotificationRepository
case "dagster-pipes":
notification_repository_adaptor = repositories.DagsterPipesNotificationRepository
notification_repository_adaptor = \
repositories.notification_repositories.DagsterPipesNotificationRepository
case _ as notification:
log.error(f"Unknown notification repository: {notification}")
sys.exit(1)
Expand All @@ -46,6 +55,7 @@ def parse_env() -> Adaptors:

def run_cli() -> None:
"""Entrypoint for the CLI handler."""
# TODO: InfoUseCase
adaptors = parse_env()
c = handlers.CLIHandler(
consumer_usecase=services.ConsumerService(
Expand Down
35 changes: 32 additions & 3 deletions src/nwp_consumer/internal/entities/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,14 @@
"""

import dataclasses
import logging
from enum import StrEnum, auto

import xarray as xr
from returns.result import Failure, ResultE, Success

log = logging.getLogger("nwp-consumer")


@dataclasses.dataclass(slots=True)
class ParameterLimits:
Expand Down Expand Up @@ -136,7 +140,7 @@ def metadata(self) -> ParameterData:
"incident on the surface expected over the next hour.",
units="W/m^2",
limits=ParameterLimits(upper=1500, lower=0),
alternate_shortnames=["swavr", "ssrd", "dswrf"],
alternate_shortnames=["swavr", "ssrd", "dswrf", "sdswrf"],
)
case self.DOWNWARD_LONGWAVE_RADIATION_FLUX_GL.name:
return ParameterData(
Expand All @@ -146,7 +150,7 @@ def metadata(self) -> ParameterData:
"incident on the surface expected over the next hour.",
units="W/m^2",
limits=ParameterLimits(upper=500, lower=0),
alternate_shortnames=["strd", "dlwrf"],
alternate_shortnames=["strd", "dlwrf", "sdlwrf"],
)
case self.RELATIVE_HUMIDITY_SL.name:
return ParameterData(
Expand All @@ -156,7 +160,7 @@ def metadata(self) -> ParameterData:
"to the equilibrium vapour pressure of water",
units="%",
limits=ParameterLimits(upper=100, lower=0),
alternate_shortnames=["r"],
alternate_shortnames=["r", "r2"],
)
case self.VISIBILITY_SL.name:
return ParameterData(
Expand Down Expand Up @@ -325,3 +329,28 @@ def try_from_alternate(name: str) -> ResultE["Parameter"]:
return Success(p)
return Failure(ValueError(f"Unknown shortname: {name}"))

@staticmethod
def rename_else_drop_ds_vars(
ds: xr.Dataset, allowed_parameters: list["Parameter"],
) -> xr.Dataset:
"""Rename variables to match expected names, dropping invalid ones.
Returns a dataset with all variables in it renamed to a known `entities.Parameter`
name, if a matching parameter exists, and it is an allowed parameter. Otherwise,
the variable is dropped from the dataset.
Args:
ds: The xarray dataset to rename.
allowed_parameters: The list of parameters allowed in the resultant dataset.
"""
for var in ds.data_vars:
param_result = Parameter.try_from_alternate(str(var))
match param_result:
case Success(p):
if p in allowed_parameters:
ds = ds.rename_vars({var: p.value})
continue
log.debug("Dropping invalid parameter '%s' from dataset", var)
ds = ds.drop_vars(str(var))
return ds

9 changes: 9 additions & 0 deletions src/nwp_consumer/internal/entities/repometadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import dataclasses
import datetime as dt
import os

import pandas as pd

Expand Down Expand Up @@ -144,6 +145,14 @@ def month_its(self, year: int, month: int) -> list[dt.datetime]:
its.append(dt.datetime(year, month, day, hour, tzinfo=dt.UTC))
return its

def missing_required_envs(self) -> list[str]:
"""Get a list of unset required environment variables.
Returns:
A list of missing environment variables.
"""
return [var for var in self.required_env if var not in os.environ]

def __str__(self) -> str:
"""Return a pretty-printed string representation of the metadata."""
pretty: str = "".join((
Expand Down
Loading

0 comments on commit e3c016f

Please sign in to comment.