diff --git a/.github/workflows/branch_ci.yml b/.github/workflows/branch_ci.yml new file mode 100644 index 00000000..1cea47cc --- /dev/null +++ b/.github/workflows/branch_ci.yml @@ -0,0 +1,184 @@ +# Workflow that runs on pushes to non-default branches + +name: Non-Default Branch Push CI (Python) + +on: + push: + branches-ignore: ['main'] + paths-ignore: ['README.md'] + +# Specify concurrency such that only one workflow can run at a time +# * Different workflow files are not affected +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +# Registry for storing Container images +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +# Ensure the GitHub token can remove packages +permissions: + packages: write + + +jobs: + + # Job to run a linter and typechecker against the codebase + lint-typecheck: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup uv + uses: astral-sh/setup-uv@v3 + with: + enable-cache: true + cache-dependency-glob: "pyproject.toml" + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version-file: "pyproject.toml" + + - name: Install editable package and required dependencies + run: uv sync + + - name: Lint package + run: uv run ruff check --output-format=github . + + - name: Typecheck package + run: uv run mypy . + # TODO: GitHub output when https://github.com/python/mypy/pull/17771 merged + + # Job to run unittests + # * Produces a JUnit XML report that can be displayed in the GitHub UI + test-unit: + runs-on: ubuntu-latest + needs: lint-typecheck + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup uv + uses: astral-sh/setup-uv@v3 + with: + enable-cache: true + cache-dependency-glob: "pyproject.toml" + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version-file: "pyproject.toml" + + - name: Install editable package and required dependencies + run: uv sync + + # Run unittests + # * Produce JUnit XML report + - name: Run unit tests + run: uv run python -m xmlrunner discover -s src/nwp_consumer -p "test_*.py" --output-file ut-report.xml + + # Create test summary to be visualised on the job summary screen on GitHub + # * Runs even if previous steps fail + - name: Create test summary + uses: test-summary/action@v2 + with: + paths: "*t-report.xml" + show: "fail, skip" + if: always() + + # Define a job that builds the documentation + # * Surfaces the documentation as an artifact + build-docs: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup uv + uses: astral-sh/setup-uv@v3 + with: + enable-cache: true + cache-dependency-glob: "pyproject.toml" + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version-file: "pyproject.toml" + + - name: Install editable package and required dependencies + run: uv sync + + # Pydoctor is ran to find any linking errors in the docs + - name: Build documentation + run: | + uv run pydoctor --html-output=tmpdocs -W -q + PDOC_ALLOW_EXEC=1 uv run pdoc -o docs \ + --docformat=google \ + --logo="https://cdn.prod.website-files.com/62d92550f6774db58d441cca/6324a2038936ecda71599a8b_OCF_Logo_black_trans.png" \ + src/nwp_consumer + + - name: Upload documentation + uses: actions/upload-artifact@v4 + with: + name: docs + path: docs + + # Job for building container image + # * Builds and pushes an OCI Container image to the registry defined in the environment variables + # * Only runs if test and lint jobs pass + build-container: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + needs: ["lint-typecheck", "test-unit"] + + steps: + # Do a non-shallow clone of the repo to ensure tags are present + # * This allows setuptools-git-versioning to automatically set the version + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Tag the built image according to the event type + # The event is a branch commit, so use the commit sha + - name: Extract metadata (tags, labels) for Container + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: type=ref,event=branch + + # Build and push the Container image to the registry + # * Creates a multiplatform-aware image + # * Pulls build cache from the registry + - name: Build and push container image + uses: docker/build-push-action@v6 + with: + context: . + file: Containerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + platforms: linux/amd64 + cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache + cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache + diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 50b219c4..00000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,294 +0,0 @@ -name: Python CI - -on: - push: - branches: [] - paths-ignore: - - 'README.md' - tags: - - 'v*' - pull_request: - branches: [] - paths-ignore: - - 'README.md' - workflow_dispatch: - -# Specify concurrency such that only one workflow can run at a time -# * Different workflow files are not affected -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -# Registry for storing Container images -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} - -# Ensure the GitHub token can remove packages -permissions: - packages: write - - -jobs: - - # Define a dependencies job that runs on all branches and PRs - # * Installs dependencies and caches them - build-venv: - runs-on: ubuntu-latest - container: quay.io/condaforge/miniforge3:latest - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - # Restore cached virtualenv, if available - # * The pyproject.toml hash is part of the cache key, invalidating - # the cache if the file changes - - name: Restore cached virtualenv - id: restore-cache - uses: actions/cache/restore@v3 - with: - path: ./venv - key: ${{ runner.os }}-venv-${{ hashFiles('**/pyproject.toml') }} - - # Should mirror the build-venv stage in the Containerfile - - name: Build venv - run: | - apt -qq update && apt -qq install -y build-essential - conda create -p ./venv python=3.12 - ./venv/bin/python -m pip install --upgrade -q pip wheel setuptools - if: steps.restore-cache.outputs.cache-hit != 'true' - - # Should mirror the build-reqs stage in the Containerfile - # * Except this installs the dev dependencies as well - - name: Install all dependencies - run: | - conda install -p ./venv -q -y eccodes zarr - ./venv/bin/python -m pip install -q .[dev] --no-binary=nwp-consumer - if: steps.restore-cache.outputs.cache-hit != 'true' - - # Cache the virtualenv for future runs - - name: Cache virtualenv - uses: actions/cache/save@v3 - with: - path: ./venv - key: ${{ steps.restore-cache.outputs.cache-primary-key }} - if: steps.restore-cache.outputs.cache-hit != 'true' - - # Define a unittest job that runs on all branches and PRs - test-unit: - runs-on: ubuntu-latest - container: quay.io/condaforge/miniforge3:latest - needs: build-venv - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - # Restore cached virtualenv - - name: Restore cached virtualenv - uses: actions/cache/restore@v3 - with: - path: ./venv - key: ${{ runner.os }}-venv-${{ hashFiles('**/pyproject.toml') }} - - - name: Install package - run: ./venv/bin/python -m pip install -q . - - # Run unittests - # * Produce JUnit XML report - - name: Run unit tests - env: - ECCODES_DEFINITION_PATH: ${{ github.workspace }}/venv/share/eccodes/definitions - run: ./venv/bin/python -m xmlrunner discover -s src/nwp_consumer -p "test_*.py" --output-file ut-report.xml - - # Create test summary to be visualised on the job summary screen on GitHub - # * Runs even if previous steps fail - - name: Create test summary - uses: test-summary/action@v2 - with: - paths: "*t-report.xml" - show: "fail, skip" - if: always() - - # Define an autotagger job that runs on merge requests - tag: - runs-on: ubuntu-latest - needs: test-unit - if: | - github.event_name == 'pull_request' && - github.event.action == 'closed' && - github.event.pull_request.merged == true - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - name: Bump version and push tag - uses: RueLaLa/auto-tagger@master - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_PR_NUMBER: ${{ github.event.number }} - - - # Define an integration test job that runs only on version on main tags - test-integration: - runs-on: ubuntu-latest - container: quay.io/condaforge/miniforge3:latest - needs: build-venv - if: | - github.event_name == 'workflow_dispatch' || - (contains(github.ref, 'refs/tags/v') && github.event_name == 'push') - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - # Restore cached virtualenv - - name: Restore cached virtualenv - uses: actions/cache/restore@v3 - with: - path: ./venv - key: ${{ runner.os }}-venv-${{ hashFiles('**/pyproject.toml') }} - - - name: Install package - run: ./venv/bin/python -m pip install -q . - - # Run integration tests - # * Requires secrets to be set in the repository settings - # * Produce JUnit XML report - - name: Run integration tests - env: - LOGLEVEL: "debug" - RAW_DIR: "/tmp/raw" - ZARR_DIR: "/tmp/zarr" - CEDA_FTP_PASS: ${{ secrets.CEDA_FTP_PASS }} - CEDA_FTP_USER: ${{ secrets.CEDA_FTP_USER }} - METOFFICE_API_KEY: ${{ secrets.METOFFICE_API_KEY }} - METOFFICE_ORDER_ID: ${{ secrets.METOFFICE_ORDER_ID }} - ECMWF_API_KEY: ${{ secrets.ECMWF_API_KEY }} - ECMWF_API_EMAIL: ${{ secrets.ECMWF_API_EMAIL }} - ECMWF_API_URL: ${{ secrets.ECMWF_API_URL }} - run: ./venv/bin/python -m xmlrunner discover -s src/test_integration -p "test_*.py" --output-file it-report.xml - - # Create test summary to be visualised on the job summary screen on GitHub - # * Runs even if previous steps fail - - name: Create test summary - uses: test-summary/action@v2 - with: - paths: "*t-report.xml" - show: "fail, skip" - if: always() - - # Define a "build-container" job that runs on branch commits only - # * Builds and pushes an OCI Container image to the registry defined in the environment variables - # * Only runs if test job passes - build-container: - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - needs: test-unit - if: github.event_name != 'pull_request' - - steps: - # Do a non-shallow clone of the repo to ensure tags are present - # * This allows setuptools-git-versioning to automatically set the version - - name: Checkout repository - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - name: Set up Buildx - uses: docker/setup-buildx-action@v2 - - - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - # Tag the built image according to the event type - # * If the event is a valid version tag, use the tag name - # * If the event is a branch commit, use the commit sha - - name: Extract metadata (tags, labels) for Container - id: meta - uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - tags: | - type=ref,event=branch - type=semver,pattern={{version}} - - # Build and push the Container image to the registry - # * Creates a multiplatform-aware image - # * Semantic versioning is handled via the metadata action - # * The image layers are cached between action runs with the following strategy - # * - On push to main, also push build cache - # * - On push to other branches, only pull build cache - - name: Build and push Container image and cache - uses: docker/build-push-action@v4 - if: github.event_name == 'push' && github.ref == 'refs/heads/main' - with: - context: . - file: Containerfile - push: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - platforms: linux/amd64,linux/arm64 - cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache - cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache,mode=max - - name: Build and push container image - uses: docker/build-push-action@v4 - if: github.event_name != 'push' || github.ref != 'refs/heads/main' - with: - context: . - file: Containerfile - push: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - platforms: linux/amd64,linux/arm64 - cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache - - # Define a "build-wheel" job that runs on version tags - # * Only runs if integration test job passes - build-wheel: - runs-on: ubuntu-latest - needs: test-integration - if: contains(github.ref, 'refs/tags/v') && github.event_name == 'push' - - steps: - # Do a non-shallow clone of the repo to ensure tags are present - # * This allows setuptools-git-versioning to automatically set the version - - name: Checkout repository - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - # Restore cached virtualenv - - name: Restore cached virtualenv - uses: actions/cache/restore@v3 - with: - path: ./venv - key: ${{ runner.os }}-venv-${{ hashFiles('**/pyproject.toml') }} - - # Building the wheel dynamically assigns the version according to git - # * The setuptools_git_versioning package reads the git tags and assigns the version - # * The version is then used in the wheel filename and made available in the package - # * setuptools_git_versioning is configured in pyproject.toml - - name: Build wheel - run: ./venv/bin/python -m pip wheel . --no-deps --wheel-dir dist - - - name: Upload wheel - uses: actions/upload-artifact@v3 - with: - name: wheel - path: dist/*.whl - - - name: Publish wheel - uses: pypa/gh-action-pypi-publish@v1.8.10 - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.github/workflows/main_ci.yml b/.github/workflows/main_ci.yml new file mode 100644 index 00000000..3ab9fe2a --- /dev/null +++ b/.github/workflows/main_ci.yml @@ -0,0 +1,39 @@ +# Workflow that runs on closed PRs to the default branch + +name: Default Branch PR Merged CI (Python) + +on: + pull_request: + types: ["closed"] + branches: ["main"] + +# Specify concurrency such that only one workflow can run at a time +# * Different workflow files are not affected +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: false + + +jobs: + + # Define an autotagger job that creates tags on changes to master + # Use #major #minor in merge commit messages to bump version beyond patch + # See https://github.com/RueLaLa/auto-tagger?tab=readme-ov-file#usage + tag: + runs-on: ubuntu-latest + if: | + github.event_name == 'pull_request' && + github.event.action == 'closed' && + github.event.pull_request.merged == true + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Bump version and push tag + uses: RueLaLa/auto-tagger@v2 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_NUMBER: ${{ github.event.number }} + diff --git a/.github/workflows/tagged_ci.yml b/.github/workflows/tagged_ci.yml new file mode 100644 index 00000000..ac0953cf --- /dev/null +++ b/.github/workflows/tagged_ci.yml @@ -0,0 +1,119 @@ +# Workflow that runs on new SemVer tags on the default branch + +name: Default Branch SemVer Tagged CI (Python) + +on: + push: + branches: ['main'] + tags: ['v[0-9]+.[0-9]+.[0-9]+'] + paths-ignore: ['README.md'] + +# Specify concurrency such that only one workflow can run at a time +# * Different workflow files are not affected +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: false + +# Registry for storing Container images +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + + +jobs: + + # Job for building container image + # * Builds and pushes an OCI Container image to the registry defined in the environment variables + build-container: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + # Do a non-shallow clone of the repo to ensure tags are present + # * This allows setuptools-git-versioning to automatically set the version + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Tag the built image according to the event type + # The event is a semver release, so use the version + - name: Extract metadata (tags, labels) for Container + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: type=semver,pattern={{version}} + + # Build and push the Container image to the registry + # * Creates a multiplatform-aware image + # * Pulls build cache from the registry and pushes new cache back + - name: Build and push container image + uses: docker/build-push-action@v6 + with: + context: . + file: Containerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + platforms: linux/amd64,linux/arm64 + cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache + cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache,mode=max + + + # Job to build and publish the package on PyPi as a wheel + build-wheel: + runs-on: ubuntu-latest + + steps: + # Do a non-shallow clone of the repo to ensure tags are present + # * This allows setuptools-git-versioning to automatically set the version + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup uv + uses: astral-sh/setup-uv@v3 + with: + enable-cache: true + cache-dependency-glob: "pyproject.toml" + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version-file: "pyproject.toml" + + - name: Install editable package and required dependencies + run: uv sync --no-dev + + # Building the wheel dynamically assigns the version according to git + # * The setuptools_git_versioning package reads the git tags and assigns the version + # * The version is then used in the wheel filename and made available in the package + # * setuptools_git_versioning is configured in pyproject.toml + - name: Build wheel + run: uv pip wheel . --no-deps --wheel-dir dist + + - name: Upload wheel + uses: actions/upload-artifact@v4 + with: + name: wheel + path: dist/*.whl + + - name: Publish wheel + uses: pypa/gh-action-pypi-publish@v1.10 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.gitignore b/.gitignore index 0557b4ea..a4a290d9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ __pycache__/ *.py[cod] *$py.class +*.pyc # C extensions *.so @@ -24,18 +25,9 @@ share/python-wheels/ *.egg-info/ .installed.cfg *.egg +*.egg-info MANIFEST -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into test_integration. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - # Unit test / coverage reports htmlcov/ .tox/ @@ -51,69 +43,17 @@ coverage.xml .hypothesis/ .pytest_cache/ cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy +_trial_temp/ +**/_trial_temp.lock # Sphinx documentation docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# PDM -.pdm.toml - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py +docs/ # Environments -.env .venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site +uv.lock +.env # mypy .mypy_cache/ @@ -123,21 +63,12 @@ dmypy.json # ruff .ruff_cache/ -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - # Cython debug symbols cython_debug/ -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -.idea/ +# Code Editors +.idea +.fleet # Downloaded files downloads @@ -146,10 +77,6 @@ zarr /testing **.idx -# S3 mocking -s3: - # MacOS .DS_Store **/*.swp - diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md deleted file mode 100644 index ecc26730..00000000 --- a/ARCHITECTURE.md +++ /dev/null @@ -1,75 +0,0 @@ -# Architecture - -This document describes the high level architecture of the nwp-consumer project. - -## Birds-eye view - -```mermaid -flowchart - subgraph "Hexagonal Architecture" - - subgraph "NWP Consumer" - subgraph "Ports" - portFI(FetcherInterface) --- core - core --- portSI(StorageInterface) - - subgraph "Core" - core{{Domain Logic}} - end - end - end - - subgraph "Driving Adaptors" - i1{ICON} --implements--> portFI - i2{ECMWF} --implements--> portFI - i3{MetOffice} --implements--> portFI - end - - subgraph "Driven Adaptors" - portSI --- o1{S3} - portSI --- o2{Huggingface} - portSI --- o3{LocalFS} - end - - end -``` - -At the top level, the consumer downloads raw NWP data, processes it to zarr, and saves it to a storage backend. - -It is built following the hexagonal architecture pattern. -This pattern is used to separate the core business logic from the driving and driven adaptors. -The core business logic is the `service` module, which contains the domain logic. -This logic is agnostic to the driving and driven actors, -instead relying on abstract classes as the ports to interact with them. - - -## Entry Points - -`src/nwp_consumer/cmd/main.py` contains the main function which runs the consumer. - -`src/nwp_consumer/internal/service/consumer.py` contains the `NWPConsumer` class, -the methods of which are the business use cases of the consumer. - -`StorageInterface` and `FetcherInterface` classes define the ports used by driving and driven actors. - -`src/nwp_consumer/internal/inputs` contains the adaptors for the driving actors. - -`src/nwp_consumer/internal/outputs` contains the adaptors for the driven actors. - -## Core - -The core business logic is contained in the `service` module. -According to the hexagonal pattern, the core logic is agnostic to the driving and driven actors. -As such, there is an internal data representation of the NWP data that the core logic acts upon. -Due to the multidimensional data of the NWP data, it is hard to define a schema for this. - -Internal data is stored an xarray dataset. -This dataset effectively acts as an array of `DataArrays` for each parameter or variable. -It should have the following dimensions and coordinates: - -- `time` dimension -- `step` dimension -- `latitude` or `x` dimension -- `longitude` or `y` dimension - -Parameters should be stored as DataArrays in the dataset. \ No newline at end of file diff --git a/Containerfile b/Containerfile index eea07b30..9c4cd8ae 100644 --- a/Containerfile +++ b/Containerfile @@ -1,36 +1,153 @@ -# Build a virtualenv using miniconda -# * Install required compilation tools for wheels via apt -# * Install required non-python binaries via conda +# POTENTIAL FOR SMALLER CONTAINERFILE IF THIS CAN BE GOT WORKING + + +# # --- Base Python image --------------------------------------------------------------- +# FROM python:3.12-bookworm AS python-base +# +# --- Builder image creation ------------------------------------------------------------- +# FROM python-base AS builder +# +# Setup non-root user +# ARG USER=monty +# RUN groupadd ${USER} && useradd -m ${USER} -g ${USER} +# USER ${USER} +# ENV PATH="/home/${USER}/.local/bin:${PATH}" +# +# WORKDIR /home/${USER} +# +# Don't generate .pyc, enable tracebacks +# ENV LANG=C.UTF-8 \ +# LC_ALL=C.UTF-8 \ +# PYTHONDONTWRITEBYTECODE=1 \ +# PYTHONFAULTHANDLER=1 +# +# # COPY --from=ghcr.io/astral-sh/uv:python3.12-bookworm --chown=1000:1000 /usr/local/bin/uv /home/${USER}/.local/bin/uv +# COPY --from=ghcr.io/astral-sh/uv:python3.12-bookworm /usr/local/bin/uv /usr/local/bin/uv +# +# RUN uv --version +# +# # --- Distroless Container creation ----------------------------------------------------- +# FROM gcr.io/distroless/cc-debian12 AS python-distroless +# +# ARG CHIPSET_ARCH=aarch64-linux-gnu +# +# # Copy the python installation from the base image +# COPY --from=python-base /usr/local/lib/ /usr/local/lib/ +# COPY --from=python-base /usr/local/bin/python /usr/local/bin/python +# COPY --from=python-base /etc/ld.so.cache /etc/ld.so.cache +# +# # Add common compiled libraries +# COPY --from=python-base /usr/lib/${CHIPSET_ARCH}/libz.so.1 /usr/lib/${CHIPSET_ARCH}/ +# COPY --from=python-base /usr/lib/${CHIPSET_ARCH}/libffi* /usr/lib/${CHIPSET_ARCH}/ +# # COPY --from=python-base /usr/lib/${CHIPSET_ARCH}/libbz2.so.1.0 /usr/lib/${CHIPSET_ARCH}/ +# # COPY --from=python-base /lib/${CHIPSET_ARCH}/libm.so.6 /lib/${CHIPSET_ARCH}/ +# COPY --from=python-base /usr/lib/${CHIPSET_ARCH}/libc.so.6 /usr/lib/${CHIPSET_ARCH}/ +# +# Create non root user +# ARG USER=monty +# COPY --from=python-base /bin/echo /bin/echo +# COPY --from=python-base /bin/rm /bin/rm +# COPY --from=python-base /bin/sh /bin/sh +# +# RUN echo "${USER}:x:1000:${USER}" >> /etc/group +# RUN echo "${USER}:x:1001:" >> /etc/group +# RUN echo "${USER}:x:1000:1001::/home/${USER}:" >> /etc/passwd +# +# Check python installation works +# RUN python --version +# RUN rm /bin/sh /bin/echo /bin/rm +# +# Don't generate .pyc, enable tracebacks +# ENV LANG=C.UTF-8 \ +# LC_ALL=C.UTF-8 \ +# PYTHONDONTWRITEBYTECODE=1 \ +# PYTHONFAULTHANDLER=1 +# +# # --- Build the application ------------------------------------------------------------- +# FROM builder AS build-app +# +# WORKDIR /app +# +# # Install dependencies using system python +# ENV UV_LINK_MODE=copy \ +# UV_COMPILE_BYTECODE=1 \ +# UV_PYTHON_DOWNLOADS=never \ +# UV_NO_CACHE=1 \ +# CFLAGS="-g0 -Wl,--strip-all" +# +# # Synchronize DEPENDENCIES without the application itself. +# # This layer is cached until pyproject.toml changes. +# # Delete any unwanted parts of the installed packages to reduce size +# RUN --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ +# echo "Installing dependencies" && \ +# mkdir src && \ +# uv sync --no-dev --no-install-project && uv run python -m eccodes selfcheck +# # echo "Optimizing site-packages" && \ +# # rm -r .venv/.local/lib/python3.12/site-packages/**/tests && \ +# # du -h .venv/.local/lib/python3.12/site-packages | sort -h | tail -n 4 +# +# COPY . . +# +# RUN python -m eccodes selfcheck +# +# # --- Distroless App image -------------------------------------------------------------- +# FROM python-distroless +# +# COPY --from=build-app /usr/local /usr/local +# +# ENV RAWDIR=/work/raw \ +# ZARRDIR=/work/data +# +# ENTRYPOINT ["nwp-consumer-cli"] +# VOLUME /work +# STOPSIGNAL SIGINT + + +# WORKING CONTAINERFILE + + FROM quay.io/condaforge/miniforge3:latest AS build-venv -RUN apt -qq update && apt -qq install -y build-essential -RUN conda create -p /venv python=3.12 -RUN /venv/bin/pip install --upgrade -q pip wheel setuptools -# Install packages into the virtualenv as a separate step -# * Only re-execute this step when the requirements files change -FROM build-venv AS build-reqs -WORKDIR /app -COPY pyproject.toml pyproject.toml -RUN conda install -p /venv -q -y eccodes zarr -RUN /venv/bin/pip install -q . --no-cache-dir --no-binary=nwp-consumer - -# Build binary for the package -# * The package is versioned via setuptools_git_versioning -# hence the .git directory is required -# * The README.md is required for the long description -FROM build-reqs AS build-app -COPY src src -COPY .git .git -COPY README.md README.md -RUN /venv/bin/pip install . -RUN rm -r /venv/share/eccodes/definitions/bufr -RUN rm -r /venv/lib/python3.12/site-packages/pandas/tests +COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv + +ENV UV_LINK_MODE=copy \ + UV_COMPILE_BYTECODE=1 \ + UV_PYTHON_DOWNLOADS=never \ + UV_LINK_MODE=copy \ + UV_PYTHON=python3.12 \ + UV_PROJECT_ENVIRONMENT=/venv +COPY pyproject.toml /_lock/ + +# Synchronize DEPENDENCIES without the application itself. +# This layer is cached until uv.lock or pyproject.toml change. +# Delete any unwanted parts of the installed packages to reduce size +RUN apt-get -qq update && apt-get -qq -y install gcc && \ + echo "Creating virtualenv at /venv" && \ + conda create --quiet --yes -p /venv python=3.12 eccodes +RUN echo "Installing dependencies into /venv" && \ + cd /_lock && \ + mkdir src && \ + uv sync --no-dev --no-install-project && \ + echo "Optimizing /venv site-packages" && \ + rm -r /venv/lib/python3.12/site-packages/**/tests && \ + rm -r /venv/lib/python3.12/site-packages/**/_*cache* && \ + rm -r /venv/share/eccodes/definitions/bufr + +# Then install the application itself +# * Delete the test and cache folders from installed packages to reduce size +COPY . /src +RUN uv pip install --no-deps --python=$UV_PROJECT_ENVIRONMENT /src # Copy the virtualenv into a distroless image # * These are small images that only contain the runtime dependencies FROM gcr.io/distroless/python3-debian11 WORKDIR /app -COPY --from=build-app /venv /venv -HEALTHCHECK CMD ["/venv/bin/nwp-consumer", "check"] -ENTRYPOINT ["/venv/bin/nwp-consumer"] -VOLUME /tmp/nwpc +COPY --from=build-venv /venv /venv + +ENV RAWDIR=/work/raw \ + ZARRDIR=/work/data \ + ECCODES_DEFINITION_PATH=/venv/share/eccodes/definitions + +ENTRYPOINT ["/venv/bin/nwp-consumer-cli"] +VOLUME /work +STOPSIGNAL SIGINT diff --git a/README.md b/README.md index 6b7835df..4c15d748 100644 --- a/README.md +++ b/README.md @@ -1,281 +1,131 @@ -