diff --git a/.cruft.json b/.cruft.json index 8f978c3c..ba613823 100644 --- a/.cruft.json +++ b/.cruft.json @@ -1,6 +1,6 @@ { - "template": "/home/tjs/git/cookiecutter-pypackage", - "commit": "f391bbd6ee14ab2478c64a1f78b74bd9903cae81", + "template": "https://github.com/Ouranosinc/cookiecutter-pypackage", + "commit": "14556700478b0afdb158d61dd35db26a77c2b83d", "checkout": null, "context": { "cookiecutter": { @@ -11,7 +11,7 @@ "project_slug": "xscen", "project_short_description": "A climate change scenario-building analysis framework, built with xclim/xarray.", "pypi_username": "RondeauG", - "version": "0.9.0", + "version": "0.10.1-dev.1", "use_pytest": "y", "use_black": "y", "use_conda": "y", @@ -20,9 +20,10 @@ "add_translations": "y", "command_line_interface": "Click", "create_author_file": "y", - "open_source_license": "Not open source", + "open_source_license": "Apache Software License 2.0", "generated_with_cruft": "y", - "_template": "/home/tjs/git/cookiecutter-pypackage" + "__gh_slug": "https://github.com/Ouranosinc/xscen", + "_template": "https://github.com/Ouranosinc/cookiecutter-pypackage" } }, "directory": null diff --git a/.flake8 b/.flake8 index 12028909..3a5dd7c2 100644 --- a/.flake8 +++ b/.flake8 @@ -6,9 +6,6 @@ exclude = docs/conf.py, tests ignore = - AZ100, - AZ200, - AZ300, C, D, E, diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 93ae5e0f..98b7d72e 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -6,7 +6,7 @@ - [ ] (If applicable) Documentation has been added / updated (for bug fixes / features). - [ ] (If applicable) Tests have been added. - [ ] This PR does not seem to break the templates. -- [ ] CHANGES.rst has been updated (with summary of main changes). +- [ ] CHANGELOG.rst has been updated (with summary of main changes). - [ ] Link to issue (:issue:`number`) and pull request (:pull:`number`) has been added. ### What kind of change does this PR introduce? diff --git a/.github/deactivated/actions-versions-updater.yml b/.github/deactivated/actions-versions-updater.yml deleted file mode 100644 index 88c0a129..00000000 --- a/.github/deactivated/actions-versions-updater.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: GitHub Actions Version Updater - -on: - schedule: - # 12:00 AM on the first of every month - - cron: '0 0 1 * *' - workflow_dispatch: - -permissions: - contents: read - -jobs: - build: - runs-on: ubuntu-latest - permissions: - actions: write - contents: write - pull-requests: write - steps: - - name: Harden Runner - uses: step-security/harden-runner@eb238b55efaa70779f274895e782ed17c84f2895 # v2.6.1 - with: - disable-sudo: true - egress-policy: block - allowed-endpoints: > - api.github.com:443 - github.com:443 - - - uses: actions/checkout@v4.1.1 - with: - # This requires a personal access token with the privileges to push directly to `main` - token: ${{ secrets.ACTIONS_VERSION_UPDATER_TOKEN }} - persist-credentials: true - - - name: Run GitHub Actions Version Updater - uses: saadmk11/github-actions-version-updater@v0.8.1 - with: - token: ${{ secrets.ACTIONS_VERSION_UPDATER_TOKEN }} - committer_email: 'bumpversion[bot]@ouranos.ca' - committer_username: 'update-github-actions[bot]' - pull_request_title: '[bot] Update GitHub Action Versions' diff --git a/.github/deactivated/conda-build.yml b/.github/deactivated/conda-build.yml deleted file mode 100644 index b30dc882..00000000 --- a/.github/deactivated/conda-build.yml +++ /dev/null @@ -1,60 +0,0 @@ -name: Build and upload conda packages - -on: - release: - types: - - released - - prereleased - workflow_dispatch: - inputs: - tag: - description: 'Tag to be built and uploaded' - required: true - type: string - label: - description: 'The type of release' - default: 'dev' - type: choice - options: - - dev - - main - -jobs: - conda_deployment_with_tag: - name: Build conda package with Python${{ matrix.python-version }} - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.9"] - steps: - - uses: actions/checkout@v4.1.1 - if: ${{ github.event.inputs.tag == '' }} - - uses: actions/checkout@v4.1.1 - if: ${{ github.event.inputs.tag != '' }} - with: - fetch-depth: 0 - ref: ${{ inputs.tag }} - - name: Setup Conda (Micromamba) with Python${{ matrix.python-version }} - uses: mamba-org/provision-with-micromamba@v16 - with: - cache-downloads: true - channels: conda-forge,defaults - extra-specs: | - python=${{ matrix.python-version }} - anaconda-client - conda-build - - name: Conditionally set label - uses: haya14busa/action-cond@v1.1.1 - id: label - with: - cond: ${{ github.event_name == 'workflow_dispatch' }} - if_true: ${{ github.event.inputs.label }} - if_false: "auto" - - name: Build and upload the conda packages - uses: uibcdf/action-build-and-upload-conda-packages@v1.2.0 - with: - meta_yaml_dir: conda/xscen - python-version: ${{ matrix.python-version }} - user: Ouranosinc - label: ${{ steps.label.outputs.value }} - token: ${{ secrets.ANACONDA_TOKEN }} diff --git a/.github/dependabot.yml b/.github/dependabot.yml index a9f28754..053707cf 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,15 +1,22 @@ +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + version: 2 updates: - package-ecosystem: github-actions - directory: / + directory: /.github/workflows schedule: - interval: daily - time: '12:00' - open-pull-requests-limit: 10 + interval: monthly + groups: + actions: + patterns: + - "*" - package-ecosystem: pip directory: / schedule: - interval: daily - time: '12:00' - open-pull-requests-limit: 10 + interval: monthly + groups: + python: + patterns: + - "*" diff --git a/.github/labeler.yml b/.github/labeler.yml index 6d75ce34..cd74ab70 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -18,17 +18,22 @@ - '.readthedocs.yml' - '.secrets.baseline' - '.yamllint.yaml' + - 'CI/**/*' - 'Makefile' - 'docs/Makefile' - 'tox.ini' +# label 'docs' all documentation-related steps and files 'docs': - changed_files: - any-glob-to-any-file: + - '.github/DISCUSSION_TEMPLATE/**/*' - '.github/ISSUE_TEMPLATE.md' - '.github/ISSUE_TEMPLATE/**/*' - '.github/PULL_REQUEST_TEMPLATE.md' - '.readthedocs.yml' + - 'AUTHORS.rst' + - 'CODE_OF_CONDUCT.md' - 'CONTRIBUTING.rst' - 'README.rst' - 'docs/**/*' diff --git a/.github/workflows/bump-version.yml b/.github/workflows/bump-version.yml index daefc6fb..6037c9ba 100644 --- a/.github/workflows/bump-version.yml +++ b/.github/workflows/bump-version.yml @@ -1,9 +1,4 @@ -# This workflow requires a personal access token named `BUMP_VERSION_TOKEN` with the following privileges: -# - Contents: Read and Write -# - Metadata: Read-Only -# - Pull Requests: Read and Write - -name: "Bump Patch Version" +name: Bump Patch Version on: push: @@ -21,7 +16,11 @@ on: - .secrets.baseline - .yamllint.yaml - AUTHORS.rst - - CHANGES.rst + - CHANGELOG.rst + - CI/**/*.in + - CI/**/*.py + - CI/**/*.txt + - CODE_OF_CONDUCT.md - CONTRIBUTING.rst - MANIFEST.in - Makefile @@ -32,11 +31,10 @@ on: - environment-dev.yml - environment.yml - pyproject.toml - - setup.py + - src/xscen/__init__.py - templates - tests/*.py - tox.ini - - xscen/__init__.py permissions: contents: read @@ -47,35 +45,47 @@ jobs: runs-on: ubuntu-latest permissions: actions: read - contents: write steps: - name: Harden Runner - uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: disable-sudo: true egress-policy: block allowed-endpoints: > + api.github.com:443 files.pythonhosted.org:443 github.com:443 pypi.org:443 - - uses: actions/checkout@v4.1.3 + - name: Generate App Token + id: token_generator + uses: actions/create-github-app-token@5d869da34e18e7287c1daad50e0b8ea0f506ce69 # v1.11.0 + with: + app-id: ${{ secrets.OURANOS_HELPER_BOT_ID }} + private-key: ${{ secrets.OURANOS_HELPER_BOT_KEY }} + - name: Checkout Repository + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 with: - persist-credentials: false - - uses: actions/setup-python@v5.1.0 + token: ${{ steps.token_generator.outputs.token }} + - name: Set up Python3 + uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 with: python-version: "3.x" - - name: Config Commit Bot - run: | - git config --local user.email "bumpversion[bot]@ouranos.ca" - git config --local user.name "bumpversion[bot]" - - name: Install bump-my-version - run: | - python -m pip install "bump-my-version>=0.18.3" + - name: Import GPG Key + uses: crazy-max/ghaction-import-gpg@01dd5d3ca463c7f10f7f4f7b4f177225ac661ee4 # v6.1.0 + with: + gpg_private_key: ${{ secrets.OURANOS_HELPER_BOT_GPG_PRIVATE_KEY }} + passphrase: ${{ secrets.OURANOS_HELPER_BOT_GPG_PRIVATE_KEY_PASSWORD }} + git_user_signingkey: true + git_commit_gpgsign: true + trust_level: 5 - name: Current Version run: | - bump-my-version show current_version - CURRENT_VERSION="$(grep -E '__version__' xscen/__init__.py | cut -d ' ' -f3)" + CURRENT_VERSION="$(grep -E '__version__' src/xscen/__init__.py | cut -d ' ' -f3)" + echo "current_version=${CURRENT_VERSION}" echo "CURRENT_VERSION=${CURRENT_VERSION}" >> $GITHUB_ENV + - name: Install CI libraries + run: | + python -m pip install --require-hashes -r CI/requirements_ci.txt - name: Conditional Bump Version run: | if [[ ${{ env.CURRENT_VERSION }} =~ -dev(\.\d+)? ]]; then @@ -85,10 +95,11 @@ jobs: echo "Version is stable, bumping 'patch' version" bump-my-version bump patch fi - bump-my-version show-bump + NEW_VERSION="$(grep -E '__version__' src/xscen/__init__.py | cut -d ' ' -f3)" + echo "new_version=${NEW_VERSION}" + echo "NEW_VERSION=${NEW_VERSION}" >> $GITHUB_ENV - name: Push Changes - uses: ad-m/github-push-action@v0.8.0 + uses: ad-m/github-push-action@d91a481090679876dfc4178fef17f286781251df # v0.8.0 with: force: false - github_token: ${{ secrets.BUMP_VERSION_TOKEN }} branch: ${{ github.ref }} diff --git a/.github/workflows/cache-cleaner.yml b/.github/workflows/cache-cleaner.yml index 30da035c..c9300085 100644 --- a/.github/workflows/cache-cleaner.yml +++ b/.github/workflows/cache-cleaner.yml @@ -16,7 +16,7 @@ jobs: actions: write steps: - name: Harden Runner - uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: disable-sudo: true egress-policy: block @@ -25,7 +25,8 @@ jobs: github.com:443 objects.githubusercontent.com:443 - - uses: actions/checkout@v4.1.3 + - name: Checkout Repository + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 - name: Cleanup run: | diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index d068c211..f0a57555 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -4,6 +4,7 @@ on: push: branches: - main + pull_request: schedule: - cron: '30 23 * * 5' @@ -25,7 +26,7 @@ jobs: - 'python' steps: - name: Harden Runner - uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: disable-sudo: true egress-policy: block @@ -37,7 +38,7 @@ jobs: pypi.org:443 uploads.github.com:443 - name: Checkout repository - uses: actions/checkout@v4.1.3 + uses: actions/checkout@v4.2.1 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@codeql-bundle-20230524 diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index 97bbc97e..39e3eafb 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -1,6 +1,7 @@ # Dependency Review Action # -# This Action will scan dependency manifest files that change as part of a Pull Request, surfacing known-vulnerable versions of the packages declared or updated in the PR. Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable packages will be blocked from merging. +# This Action will scan dependency manifest files that change as part of a Pull Request, surfacing known-vulnerable versions of the packages declared or updated in the PR. +# Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable packages will be blocked from merging. # # Source repository: https://github.com/actions/dependency-review-action # Public documentation: https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review#dependency-review-enforcement @@ -16,7 +17,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Harden Runner - uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: disable-sudo: true egress-policy: block @@ -25,8 +26,8 @@ jobs: api.github.com:443 github.com:443 - - name: 'Checkout Repository' - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 + - name: Checkout Repository + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 - - name: 'Dependency Review' - uses: actions/dependency-review-action@0659a74c94536054bfa5aeb92241f70d680cc78e + - name: Dependency Review + uses: actions/dependency-review-action@5a2ce3f5b92ee19cbb1541a4984c76d921601d7c # v4.3.4 diff --git a/.github/workflows/first-pull-request.yml b/.github/workflows/first-pull-request.yml index 77324ca9..4bd31a3f 100644 --- a/.github/workflows/first-pull-request.yml +++ b/.github/workflows/first-pull-request.yml @@ -13,18 +13,18 @@ jobs: name: Welcome runs-on: ubuntu-latest permissions: - contents: read pull-requests: write steps: - name: Harden Runner - uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: disable-sudo: true egress-policy: block allowed-endpoints: > api.github.com:443 - - uses: actions/github-script@v7.0.1 + - name: Verify Pull Request Opener + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 with: script: | // Get a list of all issues created by the PR opener @@ -56,5 +56,5 @@ jobs: It appears that this is your first Pull Request. To give credit where it's due, we ask that you add your information to the \`AUTHORS.rst\` and \`.zenodo.json\`: - [ ] The relevant author information has been added to \`AUTHORS.rst\` and \`.zenodo.json\`. - Please make sure you've read our [contributing guide](CONTRIBUTING.rst). We look forward to reviewing your Pull Request shortly ✨` + Please make sure you've read our [contributing guide](https://github.com/Ouranosinc/xscen/blob/main/CONTRIBUTING.rst). We look forward to reviewing your Pull Request shortly ✨` }) diff --git a/.github/workflows/label.yml b/.github/workflows/label.yml index 570d63b2..d7b02305 100644 --- a/.github/workflows/label.yml +++ b/.github/workflows/label.yml @@ -6,7 +6,7 @@ # https://github.com/actions/labeler/blob/master/README.md name: Labeler -on: [pull_request_target] +on: [ pull_request_target ] # Note: potential security risk from this action using pull_request_target. # Do not add actions in here which need a checkout of the repo, and do not use any caching in here. # See: https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target @@ -19,18 +19,17 @@ jobs: name: Label runs-on: ubuntu-latest permissions: - checks: write - contents: read pull-requests: write steps: - name: Harden Runner - uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: disable-sudo: true egress-policy: block allowed-endpoints: > api.github.com:44 - - uses: actions/labeler@v5.0.0 + - name: Label Pull Request + uses: actions/labeler@8558fd74291d67161a8a78ce36a881fa63b766a9 # v5.0.0 with: repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 37a65426..83a534af 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -6,12 +6,10 @@ on: - main paths-ignore: - .cruft.json - - CHANGES.rst + - CHANGELOG.rst - README.rst - pyproject.toml - - setup.cfg - - setup.py - - xscen/__init__.py + - src/xscen/__init__.py pull_request: concurrency: @@ -32,7 +30,7 @@ jobs: - "3.x" steps: - name: Harden Runner - uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: disable-sudo: true egress-policy: block @@ -40,66 +38,69 @@ jobs: files.pythonhosted.org:443 github.com:443 pypi.org:443 - - uses: actions/checkout@v4.1.3 - - uses: actions/setup-python@v5.1.0 + - name: Checkout Repository + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + - name: Setup Python${{ matrix.python-version }} + uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 with: - python-version: "3.x" - - name: Install tox + python-version: ${{ matrix.python-version }} + cache: pip + - name: Install CI libraries run: | - python -m pip install tox + python -m pip install --require-hashes -r CI/requirements_ci.txt - name: Run linting suite run: | python -m tox -e lint test-pypi: - name: ${{ matrix.tox-build }} (Python${{ matrix.python-version }}) + name: Test with Python${{ matrix.python-version }} (tox, ${{ matrix.os }}) needs: lint - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} env: COVERALLS_PARALLEL: true COVERALLS_SERVICE_NAME: github esmf-version: 8.4.2 strategy: matrix: - include: - - python-version: "3.9" - tox-build: "py39-coveralls" - - python-version: "3.10" - tox-build: "py310-coveralls" - - python-version: "3.11" - tox-build: "py311-coveralls" - - python-version: "3.12" - tox-build: "py312-esmpy-coveralls" + os: [ 'ubuntu-latest' ] + python-version: [ "3.10", "3.11", "3.12" ] # "3.13" defaults: run: shell: bash -l {0} steps: - name: Harden Runner - uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - - uses: actions/checkout@v4.1.3 + - name: Checkout Repository + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 - name: Setup Conda (Micromamba) with Python ${{ matrix.python-version }} - uses: mamba-org/setup-micromamba@v1.8.1 + uses: mamba-org/setup-micromamba@617811f69075e3fd3ae68ca64220ad065877f246 # v2.0.0 with: cache-downloads: true environment-name: xscen-pypi create-args: >- esmf=${{ env.esmf-version }} - mamba python=${{ matrix.python-version }} - tox + tox>=4.17.1 + tox-gh>=1.3.2 + # FIXME: https://github.com/mamba-org/setup-micromamba/issues/225 + micromamba-version: "1.5.10-0" # pinned to avoid the breaking changes with mamba and micromamba (2.0.0). + - name: Environment Caching + uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1 + with: + path: .tox + key: ${{ matrix.os }}-Python${{ matrix.python-version }}-${{ hashFiles('pyproject.toml', 'tox.ini') }} - name: Test with tox run: | - python -m tox -e ${{ matrix.tox-build }} + python -m tox env: ESMF_VERSION: ${{ env.esmf-version }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COVERALLS_FLAG_NAME: run-Python${{ matrix.python-version }} + COVERALLS_PARALLEL: true + COVERALLS_SERVICE_NAME: github - # - name: Compile language catalogs -# run: | -# make translate # - name: Install esmpy # run: | # pip install git+https://github.com/esmf-org/esmf.git@v${{ matrix.esmf-version }}#subdirectory=src/addon/esmpy @@ -124,40 +125,35 @@ jobs: # COVERALLS_SERVICE_NAME: github test-conda: - name: Python${{ matrix.python-version }} (conda) + name: Test with Python${{ matrix.python-version }} (Anaconda, ${{ matrix.os }}) needs: lint - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: matrix: - include: - - python-version: "3.9" - - python-version: "3.10" - - python-version: "3.11" - - python-version: "3.12" + os: [ 'ubuntu-latest' ] + python-version: [ "3.10", "3.11", "3.12" ] # "3.13" defaults: run: shell: bash -l {0} steps: - name: Harden Runner - uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - - uses: actions/checkout@v4.1.3 + - name: Checkout Repository + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 - name: Setup Conda (Micromamba) with Python ${{ matrix.python-version }} - uses: mamba-org/setup-micromamba@v1.8.1 + uses: mamba-org/setup-micromamba@617811f69075e3fd3ae68ca64220ad065877f246 # v2.0.0 with: cache-downloads: true - cache-environment: true + cache-environment: false # FIXME: No environment caching until issues with micromamba 2.0.0 are resolved. environment-file: environment-dev.yml create-args: >- python=${{ matrix.python-version }} - - name: Downgrade intake-esm - if: matrix.python-version == '3.9' - run: | - micromamba install -y -c conda-forge intake-esm=2023.11.10 + # FIXME: https://github.com/mamba-org/setup-micromamba/issues/225 + micromamba-version: "1.5.10-0" # pinned to avoid the breaking changes with mamba and micromamba (2.0.0). - name: Conda and Mamba versions run: | - micromamba list echo "micromamba $(micromamba --version)" - name: Compile catalogs and install xscen run: | @@ -165,12 +161,12 @@ jobs: python -m pip install --no-user --no-deps . - name: Check versions run: | - conda list + micromamba list python -m pip check || true - name: Test with pytest run: | - python -m pytest --cov xscen - - name: Report coverage + python -m pytest + - name: Report Coverage run: | python -m coveralls env: @@ -184,12 +180,13 @@ jobs: - test-pypi - test-conda runs-on: ubuntu-latest - container: python:3-slim steps: + - name: Harden Runner + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 + with: + disable-sudo: true + egress-policy: audit - name: Coveralls Finished - run: | - python -m pip install --upgrade coveralls - python -m coveralls --finish - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - COVERALLS_SERVICE_NAME: github + uses: coverallsapp/github-action@643bc377ffa44ace6394b2b5d0d3950076de9f63 # v2.3.0 + with: + parallel-finished: true diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml index 60faabf6..630926aa 100644 --- a/.github/workflows/publish-pypi.yml +++ b/.github/workflows/publish-pypi.yml @@ -18,7 +18,7 @@ jobs: id-token: write steps: - name: Harden Runner - uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: disable-sudo: true egress-policy: block @@ -27,17 +27,17 @@ jobs: github.com:443 pypi.org:443 upload.pypi.org:443 - - uses: actions/checkout@v4.1.3 + - name: Checkout Repository + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 - name: Set up Python3 - uses: actions/setup-python@v5.1.0 + uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 with: python-version: "3.x" - - name: Install packaging libraries + - name: Install CI libraries run: | - python -m pip install babel build setuptools wheel + python -m pip install --require-hashes -r CI/requirements_ci.txt - name: Build a binary wheel and a source tarball run: | - make translate python -m build --sdist --wheel - name: Publish distribution 📦 to PyPI - uses: pypa/gh-action-pypi-publish@v1.8.14 + uses: pypa/gh-action-pypi-publish@f7600683efdcb7656dec5b29656edb7bc586e597 # v1.10.3 diff --git a/.github/workflows/remove-obsolete-cache.yml b/.github/workflows/remove-obsolete-cache.yml deleted file mode 100644 index 665f7cd4..00000000 --- a/.github/workflows/remove-obsolete-cache.yml +++ /dev/null @@ -1,45 +0,0 @@ -# Example taken from https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#managing-caches -name: Cleanup Caches on PR Merge - -on: - pull_request: - types: - - closed - -permissions: - contents: read - -jobs: - cleanup: - runs-on: ubuntu-latest - steps: - - name: Harden Runner - uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1 - with: - disable-sudo: true - egress-policy: block - allowed-endpoints: > - api.github.com:443 - github.com:443 - objects.githubusercontent.com:443 - - - name: Check out code - uses: actions/checkout@v4.1.3 - - - name: Cleanup - run: | - gh extension install actions/gh-actions-cache - REPO=${{ github.repository }} - BRANCH="refs/pull/${{ github.event.pull_request.number }}/merge" - echo "Fetching list of cache key" - cacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH -L 100 | cut -f 1 ) - ## Setting this to not fail the workflow while deleting cache keys. - set +e - echo "Deleting caches..." - for cacheKey in $cacheKeysForPR - do - gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm - done - echo "Done" - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index c1867dc7..75873809 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -29,27 +29,30 @@ jobs: id-token: write steps: - name: Harden Runner - uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: disable-sudo: true egress-policy: block allowed-endpoints: > api.github.com:443 api.osv.dev:443 + api.scorecard.dev:443 api.securityscorecards.dev:443 fulcio.sigstore.dev:443 github.com:443 + index.docker.io:443 oss-fuzz-build-logs.storage.googleapis.com:443 rekor.sigstore.dev:443 tuf-repo-cdn.sigstore.dev:443 www.bestpractices.dev:443 - - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f + - name: Checkout Repository + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 with: persist-credentials: false - name: Run analysis - uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 + uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # v2.4.0 with: results_file: results.sarif results_format: sarif @@ -69,7 +72,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: Upload artifact - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: SARIF file path: results.sarif @@ -77,6 +80,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: Upload to code-scanning - uses: github/codeql-action/upload-sarif@e5f05b81d5b6ff8cfa111c80c22c5fd02a384118 # 3.23.0 + uses: github/codeql-action/upload-sarif@4dd16135b69a43b6c8efb853346f8437d92d3c93 # 3.26.6 with: sarif_file: results.sarif diff --git a/.github/workflows/tag-testpypi.yml b/.github/workflows/tag-testpypi.yml index b14d7557..d3b9d5ce 100644 --- a/.github/workflows/tag-testpypi.yml +++ b/.github/workflows/tag-testpypi.yml @@ -17,13 +17,13 @@ jobs: contents: write steps: - name: Harden Runner - uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - name: Checkout code - uses: actions/checkout@v4.1.3 + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 - name: Create Release - uses: softprops/action-gh-release@v2 + uses: softprops/action-gh-release@c062e08bd532815e2082a85e87e3ef29c3e6d191 # 2.0.8 env: # This token is provided by Actions, you do not need to create your own token GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -42,7 +42,7 @@ jobs: id-token: write steps: - name: Harden Runner - uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: disable-sudo: true egress-policy: block @@ -51,20 +51,20 @@ jobs: github.com:443 pypi.org:443 test.pypi.org:443 - - uses: actions/checkout@v4.1.3 + - name: Checkout Repository + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 - name: Set up Python3 - uses: actions/setup-python@v5.1.0 + uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 with: python-version: "3.x" - - name: Install packaging libraries + - name: Install CI libraries run: | - python -m pip install babel build setuptools wheel + python -m pip install --require-hashes -r CI/requirements_ci.txt - name: Build a binary wheel and a source tarball run: | - make translate python -m build --sdist --wheel - name: Publish distribution 📦 to Test PyPI - uses: pypa/gh-action-pypi-publish@v1.8.14 + uses: pypa/gh-action-pypi-publish@f7600683efdcb7656dec5b29656edb7bc586e597 # v1.10.3 with: repository-url: https://test.pypi.org/legacy/ skip-existing: true diff --git a/.github/workflows/upstream.yml b/.github/workflows/upstream.yml index 3bc5eaae..1a15b84e 100644 --- a/.github/workflows/upstream.yml +++ b/.github/workflows/upstream.yml @@ -5,12 +5,10 @@ on: - main paths-ignore: - .cruft.json - - CHANGES.rst + - CHANGELOG.rst - README.rst - pyproject.toml - - setup.cfg - - setup.py - - xscen/__init__.py + - src/xscen/__init__.py schedule: - cron: "0 0 * * *" # Daily “At 00:00” UTC workflow_dispatch: # allows you to trigger the workflow run manually @@ -27,7 +25,6 @@ jobs: name: Python${{ matrix.python-version }}-upstream runs-on: ubuntu-latest permissions: - contents: read issues: write if: | (github.event_name == 'schedule') || @@ -43,14 +40,15 @@ jobs: shell: bash -l {0} steps: - name: Harden Runner - uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: egress-policy: audit - - uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3 + - name: Checkout Repository + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: Setup Conda (Micromamba) with Python${{ matrix.python-version }} - uses: mamba-org/setup-micromamba@422500192359a097648154e8db4e39bdb6c6eed7 # v1.8.1 + uses: mamba-org/setup-micromamba@617811f69075e3fd3ae68ca64220ad065877f246 # v2.0.0 with: cache-downloads: true cache-environment: true @@ -83,6 +81,6 @@ jobs: && steps.status.outcome == 'failure' && github.event_name == 'schedule' && github.repository_owner == 'Ouranosinc' - uses: xarray-contrib/issue-from-pytest-log@138db94bfe4b12ac11fc1aff307ee0835feab403 # v1.2.8 + uses: xarray-contrib/issue-from-pytest-log@f94477e45ef40e4403d7585ba639a9a3bcc53d43 # v1.3.0 with: log-path: output-${{ matrix.python-version }}-log.jsonl diff --git a/.github/workflows/workflow-warning.yml b/.github/workflows/workflow-warning.yml index a4887a7a..0a1e658c 100644 --- a/.github/workflows/workflow-warning.yml +++ b/.github/workflows/workflow-warning.yml @@ -22,48 +22,54 @@ jobs: if: | (github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name) permissions: - contents: read pull-requests: write steps: - name: Harden Runner - uses: step-security/harden-runner@a4aa98b93cab29d9b1101a6143fb8bce00e2eac4 # v2.7.1 + uses: step-security/harden-runner@91182cccc01eb5e619899d80e4e971d6181294a7 # v2.10.1 with: disable-sudo: true egress-policy: block allowed-endpoints: > api.github.com:443 - - name: Find comment + - name: Find Warning Comment uses: peter-evans/find-comment@3eae4d37986fb5a8592848f6a574fdf654e61f9e # v3.1.0 - id: fc + id: fc_warning with: issue-number: ${{ github.event.pull_request.number }} comment-author: 'github-actions[bot]' body-includes: | This Pull Request modifies GitHub workflows and is coming from a fork. - - name: Create comment + - name: Create Warning Comment if: | - (steps.fc.outputs.comment-id == '') && + (steps.fc_warning.outputs.comment-id == '') && (!contains(github.event.pull_request.labels.*.name, 'approved')) && (github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name) uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0 with: - comment-id: ${{ steps.fc.outputs.comment-id }} + comment-id: ${{ steps.fc_warning.outputs.comment-id }} issue-number: ${{ github.event.pull_request.number }} body: | - > **Warning** + > [!WARNING] > This Pull Request modifies GitHub Workflows and is coming from a fork. **It is very important for the reviewer to ensure that the workflow changes are appropriate.** edit-mode: replace - - name: Update comment + - name: Find Note Comment + uses: peter-evans/find-comment@3eae4d37986fb5a8592848f6a574fdf654e61f9e # v3.1.0 + id: fc_note + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: 'github-actions[bot]' + body-includes: Workflow changes in this Pull Request have been approved! + - name: Update Comment if: | contains(github.event.pull_request.labels.*.name, 'approved') uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0 with: - comment-id: ${{ steps.fc.outputs.comment-id }} + comment-id: ${{ steps.fc_note.outputs.comment-id }} issue-number: ${{ github.event.pull_request.number }} body: | - > **Note** - > Changes have been approved by a maintainer. + > [!NOTE] + > Workflow changes in this Pull Request have been approved! reactions: | hooray edit-mode: append diff --git a/.gitignore b/.gitignore index 3820601d..36c14155 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,9 @@ docs/notebooks/samples/example* docs/notebooks/samples/gs-weights/ !docs/notebooks/samples/tutorial/*/*/*/*/*/*/*/*.nc +# Translation stuff +*.pot + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -61,10 +64,6 @@ coverage.xml .hypothesis/ .pytest_cache/ -# Translations -*.mo -*.pot - # Django stuff: *.log local_settings.py @@ -87,6 +86,9 @@ target/ # Jupyter Notebook .ipynb_checkpoints +# Dask worker cache +dask-worker-space/ + # pyenv .python-version diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f317a4c0..c2b4443d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,19 +1,17 @@ default_language_version: - python: python3 + python: python3 repos: - repo: https://github.com/asottile/pyupgrade - rev: v3.15.2 + rev: v3.17.0 hooks: - - id: pyupgrade - args: [ '--py39-plus' ] + - id: pyupgrade + args: [ '--py310-plus' ] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v5.0.0 hooks: - id: trailing-whitespace - exclude: setup.cfg - id: end-of-file-fixer - exclude: setup.cfg - id: debug-statements - id: mixed-line-ending - id: check-json @@ -23,7 +21,6 @@ repos: - id: check-toml - id: check-yaml args: [ '--allow-multiple-documents' ] - exclude: conda/xscen/meta.yaml - repo: https://github.com/pre-commit/pygrep-hooks rev: v1.10.0 hooks: @@ -38,7 +35,7 @@ repos: hooks: - id: toml-sort-fix - repo: https://github.com/psf/black-pre-commit-mirror - rev: 24.3.0 + rev: 24.10.0 hooks: - id: black exclude: ^docs/ @@ -48,35 +45,42 @@ repos: - id: isort exclude: ^docs/ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.3.5 + rev: v0.6.9 hooks: - id: ruff + args: [ '--fix', '--show-fixes' ] + # - id: ruff-format - repo: https://github.com/pycqa/flake8 - rev: 7.0.0 + rev: 7.1.1 hooks: - id: flake8 - additional_dependencies: [ 'flake8-alphabetize', 'flake8-rst-docstrings' ] + additional_dependencies: [ 'flake8-rst-docstrings' ] args: [ '--config=.flake8' ] - repo: https://github.com/keewis/blackdoc rev: v0.3.9 hooks: - id: blackdoc - additional_dependencies: [ 'black==24.3.0' ] + additional_dependencies: [ 'black==24.4.2' ] exclude: config.py - repo: https://github.com/adrienverge/yamllint.git rev: v1.35.1 hooks: - id: yamllint args: [ '--config-file=.yamllint.yaml' ] +# - repo: https://github.com/numpy/numpydoc +# rev: v1.8.0 +# hooks: +# - id: numpydoc-validation +# exclude: ^docs/|^tests/ - repo: https://github.com/nbQA-dev/nbQA - rev: 1.8.5 + rev: 1.8.7 hooks: - id: nbqa-pyupgrade - args: [ '--py39-plus' ] - additional_dependencies: [ 'pyupgrade==3.15.0' ] + args: [ '--py310-plus' ] + additional_dependencies: [ 'pyupgrade==3.17.0' ] - id: nbqa-black - args: [ '--target-version=py39' ] - additional_dependencies: [ 'black==24.3.0' ] + args: [ '--target-version=py310' ] + additional_dependencies: [ 'black==24.8.0' ] - id: nbqa-isort additional_dependencies: [ 'isort==5.13.2' ] - repo: https://github.com/kynan/nbstripout @@ -86,13 +90,13 @@ repos: files: ".ipynb" args: [ '--extra-keys=metadata.kernelspec' ] - repo: https://github.com/Yelp/detect-secrets - rev: v1.4.0 + rev: v1.5.0 hooks: - id: detect-secrets exclude: .cruft.json|docs/notebooks args: [ '--baseline=.secrets.baseline' ] - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.28.1 + rev: 0.29.3 hooks: - id: check-github-workflows - id: check-readthedocs @@ -102,13 +106,13 @@ repos: - id: check-useless-excludes ci: - autofix_commit_msg: | - [pre-commit.ci] auto fixes from pre-commit.com hooks + autofix_commit_msg: | + [pre-commit.ci] auto fixes from pre-commit.com hooks - for more information, see https://pre-commit.ci - autofix_prs: true - autoupdate_branch: '' - autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate' - autoupdate_schedule: quarterly - skip: [] - submodules: false + for more information, see https://pre-commit.ci + autofix_prs: true + autoupdate_branch: '' + autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate' + autoupdate_schedule: monthly + skip: [ ] + submodules: false diff --git a/.readthedocs.yml b/.readthedocs.yml index 7236e47d..946770ad 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -13,7 +13,7 @@ build: - make translate - pip install . --no-deps pre_build: - - sphinx-apidoc -o docs/apidoc --private --module-first xscen + - sphinx-apidoc -o docs/apidoc --private --module-first src/xscen - env SKIP_NOTEBOOKS=1 sphinx-build -b linkcheck docs/ _build/linkcheck || true - env SKIP_NOTEBOOKS=1 sphinx-build -M gettext docs docs/_build # post_build: diff --git a/.secrets.baseline b/.secrets.baseline index f0f316cd..2c1bf127 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -1,5 +1,5 @@ { - "version": "1.4.0", + "version": "1.5.0", "plugins_used": [ { "name": "ArtifactoryDetector" diff --git a/.yamllint.yaml b/.yamllint.yaml index f01a5d64..fc95e170 100644 --- a/.yamllint.yaml +++ b/.yamllint.yaml @@ -1,11 +1,39 @@ --- rules: + + brackets: + forbid: false + min-spaces-inside: 1 + max-spaces-inside: 1 + + commas: + min-spaces-after: 1 + document-start: disable + + float-values: + require-numeral-before-decimal: true + + hyphens: + max-spaces-after: 1 + + indentation: + indent-sequences: whatever + spaces: consistent + + key-duplicates: + forbid-duplicated-merge-keys: true + line-length: - max: 120 + allow-non-breakable-words: true + allow-non-breakable-inline-mappings: true + max: 225 level: warning - truthy: disable -ignore: | - conda/xscen/meta.yaml + new-lines: + type: unix + + trailing-spaces: {} + + truthy: disable diff --git a/.zenodo.json b/.zenodo.json index 9af036da..d2742764 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -44,6 +44,11 @@ "name": "Bourdeau-Goulet, Sarah-Claude", "affiliation": "Ouranos", "orcid": "0000-0002-6125-2557" + }, + { + "name": "Gauvin St-Denis, Blaise", + "affiliation": "Ouranos", + "orcid": "0009-0004-9049-2092" } ], "keywords": [ diff --git a/AUTHORS.rst b/AUTHORS.rst index c481db41..be5863df 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -23,3 +23,4 @@ Contributors * Yannick Rousseau * Marco Braun `@vindelico `_ * Sarah-Claude Bourdeau-Goulet `@sarahclaude `_ +* Blaise Gauvin St-Denis `@bstdenis `_ diff --git a/CHANGES.rst b/CHANGELOG.rst similarity index 80% rename from CHANGES.rst rename to CHANGELOG.rst index cb61b94e..26043b0c 100644 --- a/CHANGES.rst +++ b/CHANGELOG.rst @@ -2,6 +2,119 @@ Changelog ========= +v0.11.0 (unreleased) +-------------------- +Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`). + +New features and enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* ``xs.io.make_toc`` now includes the global attributes of the dataset after the information about the variables. (:pull:`473`). + +Bug fixes +^^^^^^^^^ +* ``xs.io.save_to_table`` now correctly handles the case where the input is a `DataArray` or a `Dataset` with a single variable. (:pull:`473`). + +v0.10.0 (2024-09-30) +-------------------- +Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Pascal Bourgault (:user:`aulemahal`), Gabriel Rondeau-Genesse (:user:`RondeauG`), Trevor James Smith (:user:`Zeitsperre`). + +New features and enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* The `mask` argument in ``stack_drop_nans`` can now be a list of dimensions. In that case, a `dropna(how='all')` operation will be used to create the mask on-the-fly. (:pull:`450`). +* Few changes to ``clean_up``: + * The `convert_calendar` function now uses `xarray` instead of `xclim`. (:pull:`450`). + * The `attrs_to_remove` and `remove_all_attrs_except` arguments now use real regex. (:pull:`450`). + * Multiple entries can now be given for `change_attr_prefix`. (:pull:`450`). +* ``minimum_calendar`` now accepts a list as input. (:pull:`450`). +* More calendars are now recognized in ``translate_time_chunk``. (:pull:`450`). +* `new_dim` in ``unstack_dates`` is now None by default and changes depending on the frequency. It becomes `month` if the data is exactly monthly, and keep the old default of `season` otherwise. (:pull:`450`). +* Updated the list of libraries in `show_versions` to reflect our current environment. (:pull:`450`). +* New ``xscen.catutils.patterns_from_schema`` to generate all possible patterns from a given schema (or one of xscen's default), to use with :py:func:`parse_directory`. (:pull:`431`). +* New ``DataCatalog.copy_files`` to copy all files of catalog to a new destination, unzipping if needed and returning a new catalog. (:pull:`431`). +* Convenience functions ``xs.io.zip_directory`` and ``xs.io.unzip_directory`` (for zarrs). (:pull:`431`). +* New argument ``compute_indicators``: ``rechunk_input`` to rechunk the inputs to resample-appropriate chunks before calling xclim. (:pull:`431`). +* New ``xs.indicators.get_indicator_outputs`` to retrieve what variable name(s) and frequency to expect from an xclim indicator. (:pull:`431`). +* `xscen` now supports launches tests from `pytest` with the `--numprocesses` option. See the `pytest-xdist documentation `_ for more information. (:pull:`464`). +* Conservative regridding now supports oblique mercator projections. (:pull:`467`). +* The automatic name for the weight file in ``regrid_dataset`` is now more explicit to avoid errors, but now requires `cat:id` and `cat:domain` arguments for both the source and target datasets. (:pull:`467`). + +Bug fixes +^^^^^^^^^ +* Fixed bug with reusing weights. (:issue:`411`, :pull:`414`). +* Fixed bug in `update_from_ds` when "time" is a coordinate, but not a dimension. (:pull: `417`). +* Avoid modification of mutable arguments in ``search_data_catalogs`` (:pull:`413`). +* ``ensure_correct_time`` now correctly handles cases where timesteps are missing. (:pull:`440`). +* If using the argument `tile_buffer` with a `shape` method in ``spatial.subset``, the shapefile will now be reprojected to a WGS84 grid before the buffer is applied. (:pull:`440`). +* ``maybe_unstack`` now works if the dimension name is not the default. (:pull:`450`). +* ``unstack_fill_nan`` now works if given a dictionary that contains both dimensions and coordinates. (:pull:`450`). +* ``clean_up`` no longer modifies the original dataset. (:pull:`450`). +* ``unstack_dates`` now works correctly for yearly datasets when `winter_starts_year=True`, as well as multi-year datasets. (:pull:`450`). +* Fix ``xs.catalog.concat_data_catalogs`` for catalogs that have not been search yet. (:pull:`431`). +* Fix indicator computation using ``freq=2Q*`` by assuming this means a semiannual frequency anchored at the given month (pandas assumes 2 quarter steps, any of them anchored at the given month). (:pull:`431`). +* ``create_bounds_rotated_pole`` now uses the default value if the dataset has no `north_pole_grid_longitude` attribute, instead of crashing. (:pull:`455`). +* Rewrote the global tas data file with latest HDF5/h5py to avoid errors when using h5py 3.11 and hdf5 1.14.2. (:pull:`1861`). +* Remove reference of deprecated xclim functions (``convert_calendar``, ``get_calendar``) and adapt the code for supporting xclim 0.52.2 and its subsequent development version. (:pull:`465`). + +Breaking changes +^^^^^^^^^^^^^^^^ +* `convert_calendar` in ``clean_up`` now uses `xarray` instead of `xclim`. Keywords aren't compatible between the two, but given that `xclim` will abandon its function, no backwards compatibility was sought. (:pull:`450`). +* `attrs_to_remove` and `remove_all_attrs_except` in ``clean_up`` now use real regex. It should not be too breaking since a `fullmatch()` is used, but `*` is now `.*`. (:pull:`450`). +* Python 3.9 is no longer supported. (:pull:`456`). +* Functions and arguments that were deprecated in `xscen` v0.8.0 or earlier have been removed. (:pull:`461`). +* `pytest-xdist` is now a development dependency. (:pull:`464`). +* ``xs.regrid.create_bounds_rotated_pole`` has been renamed to ``xs.regrid.create_bounds_gridmapping``. (:pull:`467`). +* The `weights_location` argument in ``regrid_dataset`` is no longer positional. (:pull:`467`). +* The ``xs.regrid.create_mask`` function now requires explicit arguments instead of a dictionary. (:pull:`467`). + +Internal changes +^^^^^^^^^^^^^^^^ +* ``DataCatalog.to_dataset`` can now accept a ``preprocess`` argument even if ``create_ensemble_on`` is given. The user assumes calendar handling. (:pull:`431`). +* Include domain in `weight_location` in ``regrid_dataset``. (:pull:`414`). +* Added pins to `xarray`, `xclim`, `h5py`, and `netcdf4`. (:pull:`414`). +* Add ``.zip`` and ``.zarr.zip`` as possible file extensions for Zarr datasets. (:pull:`426`). +* Explicitly assign coords of multiindex in `xs.unstack_fill_nan`. (:pull:`427`). +* French translations are compiled offline. A new check ensures no PR are merged with missing messages. (:issue:`342`, :pull:`443`). +* Continued work to add tests. (:pull:`450`). +* Updated the cookiecutter template via `cruft`: (:pull:`452`) + * GitHub Workflows that use rely on `PyPI`-based dependencies now use commit hashes. + * `Dependabot` will now group updates by type. + * Dependencies have been updated and synchronized. + * Contributor guidance documentation has been adjusted. + * `numpydoc-validate` has been added to the linting tools. + * Linting checks are more reliant on `ruff` suggestions and stricter. + * `flake8-alphabetize` has been replaced by `ruff`. + * License information has been updated in the library top-level `__init__.py`. +* Docstrings have been adjusted to meet the `numpydoc` standard. (:pull:`452`). + +CI changes +^^^^^^^^^^ +* The `bump-version.yml` workflow now uses the Ouranosinc GitHub Helper Bot to sign bump version commits. (:pull:`462`). + +v0.9.1 (2024-06-04) +------------------- +Contributors to this version: Pascal Bourgault (:user:`aulemahal`), Trevor James Smith (:user:`Zeitsperre`), Juliette Lavoie (:user:`juliettelavoie`). + +Breaking changes +^^^^^^^^^^^^^^^^ +* `xscen` now uses a `src layout `_ in lieu of a flat layout. (:pull:`407`). + +Bug fixes +^^^^^^^^^ +* Fixed defaults for ``xr_combine_kwargs`` in ``extract_dataset`` (:pull:`402`). +* Fixed bug with `xs.utils.update_attr`(:issue:`404`, :pull:`405`). +* Fixed template 1 bugs due to changes in dependencies. ( :pull:`405`). + +Internal changes +^^^^^^^^^^^^^^^^ +* `cartopy` has been pinned above version '0.23.0' in order to address a licensing issue. (:pull:`403`). +* The cookiecutter template has been updated to the latest commit via `cruft`. (:pull:`407`). + * GitHub Workflows now point to commits rather than tags. + * `Dependabot` will now only update on a monthly schedule. + * Dependencies have been updated and synchronized. + * ``CHANGES.rst`` is now ``CHANGELOG.rst`` (see: ` KeepAChangelog `_). + * The ``CODE_OF_CONDUCT.rst`` file adapted to `Contributor Covenant v2.1 `_. + * Maintainer-specific directions are now found under ``releasing.rst`` + v0.9.0 (2024-05-07) ------------------- Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Gabriel Rondeau-Genesse (:user:`RondeauG`), Juliette Lavoie (:user:`juliettelavoie`), Marco Braun (:user:`vindelico`). diff --git a/CI/requirements_ci.in b/CI/requirements_ci.in new file mode 100644 index 00000000..a0264947 --- /dev/null +++ b/CI/requirements_ci.in @@ -0,0 +1,8 @@ +build==1.2.2 +bump-my-version==0.26.1 +coveralls==4.0.1 +pip==24.2.0 +setuptools==75.1.0 +setuptools-scm==8.1.0 +tox==4.21.0 +tox-gh==1.4.1 diff --git a/CI/requirements_ci.txt b/CI/requirements_ci.txt new file mode 100644 index 00000000..1e86d551 --- /dev/null +++ b/CI/requirements_ci.txt @@ -0,0 +1,439 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --allow-unsafe --generate-hashes --output-file=CI/requirements_ci.txt CI/requirements_ci.in +# +annotated-types==0.7.0 \ + --hash=sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53 \ + --hash=sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89 + # via pydantic +bracex==2.4 \ + --hash=sha256:a27eaf1df42cf561fed58b7a8f3fdf129d1ea16a81e1fadd1d17989bc6384beb \ + --hash=sha256:efdc71eff95eaff5e0f8cfebe7d01adf2c8637c8c92edaf63ef348c241a82418 + # via wcmatch +build==1.2.2 \ + --hash=sha256:119b2fb462adef986483438377a13b2f42064a2a3a4161f24a0cca698a07ac8c \ + --hash=sha256:277ccc71619d98afdd841a0e96ac9fe1593b823af481d3b0cea748e8894e0613 + # via -r CI/requirements_ci.in +bump-my-version==0.26.1 \ + --hash=sha256:af1cada726cf6f9a723d18941c68c325d5196453a180b3a42f8e0b38567d734d \ + --hash=sha256:be09c48111eeba56f8c870b69718013f52b5b6c6e65bc3bda5bc928181901c48 + # via -r CI/requirements_ci.in +cachetools==5.5.0 \ + --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ + --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a + # via tox +certifi==2024.7.4 \ + --hash=sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b \ + --hash=sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90 + # via requests +chardet==5.2.0 \ + --hash=sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7 \ + --hash=sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970 + # via tox +charset-normalizer==3.3.2 \ + --hash=sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027 \ + --hash=sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087 \ + --hash=sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786 \ + --hash=sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8 \ + --hash=sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09 \ + --hash=sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185 \ + --hash=sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574 \ + --hash=sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e \ + --hash=sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519 \ + --hash=sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898 \ + --hash=sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269 \ + --hash=sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3 \ + --hash=sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f \ + --hash=sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6 \ + --hash=sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8 \ + --hash=sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a \ + --hash=sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73 \ + --hash=sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc \ + --hash=sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714 \ + --hash=sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2 \ + --hash=sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc \ + --hash=sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce \ + --hash=sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d \ + --hash=sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e \ + --hash=sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6 \ + --hash=sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269 \ + --hash=sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96 \ + --hash=sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d \ + --hash=sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a \ + --hash=sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4 \ + --hash=sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77 \ + --hash=sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d \ + --hash=sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0 \ + --hash=sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed \ + --hash=sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068 \ + --hash=sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac \ + --hash=sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25 \ + --hash=sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8 \ + --hash=sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab \ + --hash=sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26 \ + --hash=sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2 \ + --hash=sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db \ + --hash=sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f \ + --hash=sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5 \ + --hash=sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99 \ + --hash=sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c \ + --hash=sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d \ + --hash=sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811 \ + --hash=sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa \ + --hash=sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a \ + --hash=sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03 \ + --hash=sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b \ + --hash=sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04 \ + --hash=sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c \ + --hash=sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001 \ + --hash=sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458 \ + --hash=sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389 \ + --hash=sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99 \ + --hash=sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985 \ + --hash=sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537 \ + --hash=sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238 \ + --hash=sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f \ + --hash=sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d \ + --hash=sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796 \ + --hash=sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a \ + --hash=sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143 \ + --hash=sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8 \ + --hash=sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c \ + --hash=sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5 \ + --hash=sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5 \ + --hash=sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711 \ + --hash=sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4 \ + --hash=sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6 \ + --hash=sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c \ + --hash=sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7 \ + --hash=sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4 \ + --hash=sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b \ + --hash=sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae \ + --hash=sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12 \ + --hash=sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c \ + --hash=sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae \ + --hash=sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8 \ + --hash=sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887 \ + --hash=sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b \ + --hash=sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4 \ + --hash=sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f \ + --hash=sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5 \ + --hash=sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33 \ + --hash=sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519 \ + --hash=sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561 + # via requests +click==8.1.7 \ + --hash=sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28 \ + --hash=sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de + # via + # bump-my-version + # rich-click +colorama==0.4.6 \ + --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \ + --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6 + # via tox +coverage[toml]==7.6.0 \ + --hash=sha256:0086cd4fc71b7d485ac93ca4239c8f75732c2ae3ba83f6be1c9be59d9e2c6382 \ + --hash=sha256:01c322ef2bbe15057bc4bf132b525b7e3f7206f071799eb8aa6ad1940bcf5fb1 \ + --hash=sha256:03cafe82c1b32b770a29fd6de923625ccac3185a54a5e66606da26d105f37dac \ + --hash=sha256:044a0985a4f25b335882b0966625270a8d9db3d3409ddc49a4eb00b0ef5e8cee \ + --hash=sha256:07ed352205574aad067482e53dd606926afebcb5590653121063fbf4e2175166 \ + --hash=sha256:0d1b923fc4a40c5832be4f35a5dab0e5ff89cddf83bb4174499e02ea089daf57 \ + --hash=sha256:0e7b27d04131c46e6894f23a4ae186a6a2207209a05df5b6ad4caee6d54a222c \ + --hash=sha256:1fad32ee9b27350687035cb5fdf9145bc9cf0a094a9577d43e909948ebcfa27b \ + --hash=sha256:289cc803fa1dc901f84701ac10c9ee873619320f2f9aff38794db4a4a0268d51 \ + --hash=sha256:3c59105f8d58ce500f348c5b56163a4113a440dad6daa2294b5052a10db866da \ + --hash=sha256:46c3d091059ad0b9c59d1034de74a7f36dcfa7f6d3bde782c49deb42438f2450 \ + --hash=sha256:482855914928c8175735a2a59c8dc5806cf7d8f032e4820d52e845d1f731dca2 \ + --hash=sha256:49c76cdfa13015c4560702574bad67f0e15ca5a2872c6a125f6327ead2b731dd \ + --hash=sha256:4b03741e70fb811d1a9a1d75355cf391f274ed85847f4b78e35459899f57af4d \ + --hash=sha256:4bea27c4269234e06f621f3fac3925f56ff34bc14521484b8f66a580aacc2e7d \ + --hash=sha256:4d5fae0a22dc86259dee66f2cc6c1d3e490c4a1214d7daa2a93d07491c5c04b6 \ + --hash=sha256:543ef9179bc55edfd895154a51792b01c017c87af0ebaae092720152e19e42ca \ + --hash=sha256:54dece71673b3187c86226c3ca793c5f891f9fc3d8aa183f2e3653da18566169 \ + --hash=sha256:6379688fb4cfa921ae349c76eb1a9ab26b65f32b03d46bb0eed841fd4cb6afb1 \ + --hash=sha256:65fa405b837060db569a61ec368b74688f429b32fa47a8929a7a2f9b47183713 \ + --hash=sha256:6616d1c9bf1e3faea78711ee42a8b972367d82ceae233ec0ac61cc7fec09fa6b \ + --hash=sha256:6fe885135c8a479d3e37a7aae61cbd3a0fb2deccb4dda3c25f92a49189f766d6 \ + --hash=sha256:7221f9ac9dad9492cecab6f676b3eaf9185141539d5c9689d13fd6b0d7de840c \ + --hash=sha256:76d5f82213aa78098b9b964ea89de4617e70e0d43e97900c2778a50856dac605 \ + --hash=sha256:7792f0ab20df8071d669d929c75c97fecfa6bcab82c10ee4adb91c7a54055463 \ + --hash=sha256:831b476d79408ab6ccfadaaf199906c833f02fdb32c9ab907b1d4aa0713cfa3b \ + --hash=sha256:9146579352d7b5f6412735d0f203bbd8d00113a680b66565e205bc605ef81bc6 \ + --hash=sha256:9cc44bf0315268e253bf563f3560e6c004efe38f76db03a1558274a6e04bf5d5 \ + --hash=sha256:a73d18625f6a8a1cbb11eadc1d03929f9510f4131879288e3f7922097a429f63 \ + --hash=sha256:a8659fd33ee9e6ca03950cfdcdf271d645cf681609153f218826dd9805ab585c \ + --hash=sha256:a94925102c89247530ae1dab7dc02c690942566f22e189cbd53579b0693c0783 \ + --hash=sha256:ad4567d6c334c46046d1c4c20024de2a1c3abc626817ae21ae3da600f5779b44 \ + --hash=sha256:b2e16f4cd2bc4d88ba30ca2d3bbf2f21f00f382cf4e1ce3b1ddc96c634bc48ca \ + --hash=sha256:bbdf9a72403110a3bdae77948b8011f644571311c2fb35ee15f0f10a8fc082e8 \ + --hash=sha256:beb08e8508e53a568811016e59f3234d29c2583f6b6e28572f0954a6b4f7e03d \ + --hash=sha256:c4cbe651f3904e28f3a55d6f371203049034b4ddbce65a54527a3f189ca3b390 \ + --hash=sha256:c7b525ab52ce18c57ae232ba6f7010297a87ced82a2383b1afd238849c1ff933 \ + --hash=sha256:ca5d79cfdae420a1d52bf177de4bc2289c321d6c961ae321503b2ca59c17ae67 \ + --hash=sha256:cdab02a0a941af190df8782aafc591ef3ad08824f97850b015c8c6a8b3877b0b \ + --hash=sha256:d17c6a415d68cfe1091d3296ba5749d3d8696e42c37fca5d4860c5bf7b729f03 \ + --hash=sha256:d39bd10f0ae453554798b125d2f39884290c480f56e8a02ba7a6ed552005243b \ + --hash=sha256:d4b3cd1ca7cd73d229487fa5caca9e4bc1f0bca96526b922d61053ea751fe791 \ + --hash=sha256:d50a252b23b9b4dfeefc1f663c568a221092cbaded20a05a11665d0dbec9b8fb \ + --hash=sha256:da8549d17489cd52f85a9829d0e1d91059359b3c54a26f28bec2c5d369524807 \ + --hash=sha256:dcd070b5b585b50e6617e8972f3fbbee786afca71b1936ac06257f7e178f00f6 \ + --hash=sha256:ddaaa91bfc4477d2871442bbf30a125e8fe6b05da8a0015507bfbf4718228ab2 \ + --hash=sha256:df423f351b162a702c053d5dddc0fc0ef9a9e27ea3f449781ace5f906b664428 \ + --hash=sha256:dff044f661f59dace805eedb4a7404c573b6ff0cdba4a524141bc63d7be5c7fd \ + --hash=sha256:e7e128f85c0b419907d1f38e616c4f1e9f1d1b37a7949f44df9a73d5da5cd53c \ + --hash=sha256:ed8d1d1821ba5fc88d4a4f45387b65de52382fa3ef1f0115a4f7a20cdfab0e94 \ + --hash=sha256:f2501d60d7497fd55e391f423f965bbe9e650e9ffc3c627d5f0ac516026000b8 \ + --hash=sha256:f7db0b6ae1f96ae41afe626095149ecd1b212b424626175a6633c2999eaad45b + # via coveralls +coveralls==4.0.1 \ + --hash=sha256:7a6b1fa9848332c7b2221afb20f3df90272ac0167060f41b5fe90429b30b1809 \ + --hash=sha256:7b2a0a2bcef94f295e3cf28dcc55ca40b71c77d1c2446b538e85f0f7bc21aa69 + # via -r CI/requirements_ci.in +distlib==0.3.8 \ + --hash=sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784 \ + --hash=sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64 + # via virtualenv +docopt==0.6.2 \ + --hash=sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491 + # via coveralls +filelock==3.16.1 \ + --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ + --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 + # via + # tox + # virtualenv +idna==3.7 \ + --hash=sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc \ + --hash=sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 + # via requests +markdown-it-py==3.0.0 \ + --hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \ + --hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb + # via rich +mdurl==0.1.2 \ + --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ + --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba + # via markdown-it-py +packaging==24.1 \ + --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ + --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 + # via + # build + # pyproject-api + # setuptools-scm + # tox +platformdirs==4.3.6 \ + --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ + --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb + # via + # tox + # virtualenv +pluggy==1.5.0 \ + --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \ + --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 + # via tox +prompt-toolkit==3.0.36 \ + --hash=sha256:3e163f254bef5a03b146397d7c1963bd3e2812f0964bb9a24e6ec761fd28db63 \ + --hash=sha256:aa64ad242a462c5ff0363a7b9cfe696c20d55d9fc60c11fd8e632d064804d305 + # via questionary +pydantic==2.8.2 \ + --hash=sha256:6f62c13d067b0755ad1c21a34bdd06c0c12625a22b0fc09c6b149816604f7c2a \ + --hash=sha256:73ee9fddd406dc318b885c7a2eab8a6472b68b8fb5ba8150949fc3db939f23c8 + # via + # bump-my-version + # pydantic-settings +pydantic-core==2.20.1 \ + --hash=sha256:035ede2e16da7281041f0e626459bcae33ed998cca6a0a007a5ebb73414ac72d \ + --hash=sha256:04024d270cf63f586ad41fff13fde4311c4fc13ea74676962c876d9577bcc78f \ + --hash=sha256:0827505a5c87e8aa285dc31e9ec7f4a17c81a813d45f70b1d9164e03a813a686 \ + --hash=sha256:084659fac3c83fd674596612aeff6041a18402f1e1bc19ca39e417d554468482 \ + --hash=sha256:10d4204d8ca33146e761c79f83cc861df20e7ae9f6487ca290a97702daf56006 \ + --hash=sha256:11b71d67b4725e7e2a9f6e9c0ac1239bbc0c48cce3dc59f98635efc57d6dac83 \ + --hash=sha256:150906b40ff188a3260cbee25380e7494ee85048584998c1e66df0c7a11c17a6 \ + --hash=sha256:175873691124f3d0da55aeea1d90660a6ea7a3cfea137c38afa0a5ffabe37b88 \ + --hash=sha256:177f55a886d74f1808763976ac4efd29b7ed15c69f4d838bbd74d9d09cf6fa86 \ + --hash=sha256:19c0fa39fa154e7e0b7f82f88ef85faa2a4c23cc65aae2f5aea625e3c13c735a \ + --hash=sha256:1eedfeb6089ed3fad42e81a67755846ad4dcc14d73698c120a82e4ccf0f1f9f6 \ + --hash=sha256:225b67a1f6d602de0ce7f6c1c3ae89a4aa25d3de9be857999e9124f15dab486a \ + --hash=sha256:242b8feb3c493ab78be289c034a1f659e8826e2233786e36f2893a950a719bb6 \ + --hash=sha256:254ec27fdb5b1ee60684f91683be95e5133c994cc54e86a0b0963afa25c8f8a6 \ + --hash=sha256:25e9185e2d06c16ee438ed39bf62935ec436474a6ac4f9358524220f1b236e43 \ + --hash=sha256:26ab812fa0c845df815e506be30337e2df27e88399b985d0bb4e3ecfe72df31c \ + --hash=sha256:26ca695eeee5f9f1aeeb211ffc12f10bcb6f71e2989988fda61dabd65db878d4 \ + --hash=sha256:26dc97754b57d2fd00ac2b24dfa341abffc380b823211994c4efac7f13b9e90e \ + --hash=sha256:270755f15174fb983890c49881e93f8f1b80f0b5e3a3cc1394a255706cabd203 \ + --hash=sha256:2aafc5a503855ea5885559eae883978c9b6d8c8993d67766ee73d82e841300dd \ + --hash=sha256:2d036c7187b9422ae5b262badb87a20a49eb6c5238b2004e96d4da1231badef1 \ + --hash=sha256:33499e85e739a4b60c9dac710c20a08dc73cb3240c9a0e22325e671b27b70d24 \ + --hash=sha256:37eee5b638f0e0dcd18d21f59b679686bbd18917b87db0193ae36f9c23c355fc \ + --hash=sha256:38cf1c40a921d05c5edc61a785c0ddb4bed67827069f535d794ce6bcded919fc \ + --hash=sha256:3acae97ffd19bf091c72df4d726d552c473f3576409b2a7ca36b2f535ffff4a3 \ + --hash=sha256:3c5ebac750d9d5f2706654c638c041635c385596caf68f81342011ddfa1e5598 \ + --hash=sha256:3d482efec8b7dc6bfaedc0f166b2ce349df0011f5d2f1f25537ced4cfc34fd98 \ + --hash=sha256:407653af5617f0757261ae249d3fba09504d7a71ab36ac057c938572d1bc9331 \ + --hash=sha256:40a783fb7ee353c50bd3853e626f15677ea527ae556429453685ae32280c19c2 \ + --hash=sha256:41e81317dd6a0127cabce83c0c9c3fbecceae981c8391e6f1dec88a77c8a569a \ + --hash=sha256:41f4c96227a67a013e7de5ff8f20fb496ce573893b7f4f2707d065907bffdbd6 \ + --hash=sha256:469f29f9093c9d834432034d33f5fe45699e664f12a13bf38c04967ce233d688 \ + --hash=sha256:4745f4ac52cc6686390c40eaa01d48b18997cb130833154801a442323cc78f91 \ + --hash=sha256:4868f6bd7c9d98904b748a2653031fc9c2f85b6237009d475b1008bfaeb0a5aa \ + --hash=sha256:4aa223cd1e36b642092c326d694d8bf59b71ddddc94cdb752bbbb1c5c91d833b \ + --hash=sha256:4dd484681c15e6b9a977c785a345d3e378d72678fd5f1f3c0509608da24f2ac0 \ + --hash=sha256:4f2790949cf385d985a31984907fecb3896999329103df4e4983a4a41e13e840 \ + --hash=sha256:512ecfbefef6dac7bc5eaaf46177b2de58cdf7acac8793fe033b24ece0b9566c \ + --hash=sha256:516d9227919612425c8ef1c9b869bbbee249bc91912c8aaffb66116c0b447ebd \ + --hash=sha256:53e431da3fc53360db73eedf6f7124d1076e1b4ee4276b36fb25514544ceb4a3 \ + --hash=sha256:595ba5be69b35777474fa07f80fc260ea71255656191adb22a8c53aba4479231 \ + --hash=sha256:5b5ff4911aea936a47d9376fd3ab17e970cc543d1b68921886e7f64bd28308d1 \ + --hash=sha256:5d41e6daee2813ecceea8eda38062d69e280b39df793f5a942fa515b8ed67953 \ + --hash=sha256:5e999ba8dd90e93d57410c5e67ebb67ffcaadcea0ad973240fdfd3a135506250 \ + --hash=sha256:5f239eb799a2081495ea659d8d4a43a8f42cd1fe9ff2e7e436295c38a10c286a \ + --hash=sha256:635fee4e041ab9c479e31edda27fcf966ea9614fff1317e280d99eb3e5ab6fe2 \ + --hash=sha256:65db0f2eefcaad1a3950f498aabb4875c8890438bc80b19362cf633b87a8ab20 \ + --hash=sha256:6b507132dcfc0dea440cce23ee2182c0ce7aba7054576efc65634f080dbe9434 \ + --hash=sha256:6b9d9bb600328a1ce523ab4f454859e9d439150abb0906c5a1983c146580ebab \ + --hash=sha256:70c8daf4faca8da5a6d655f9af86faf6ec2e1768f4b8b9d0226c02f3d6209703 \ + --hash=sha256:77bf3ac639c1ff567ae3b47f8d4cc3dc20f9966a2a6dd2311dcc055d3d04fb8a \ + --hash=sha256:784c1214cb6dd1e3b15dd8b91b9a53852aed16671cc3fbe4786f4f1db07089e2 \ + --hash=sha256:7eb6a0587eded33aeefea9f916899d42b1799b7b14b8f8ff2753c0ac1741edac \ + --hash=sha256:7ed1b0132f24beeec5a78b67d9388656d03e6a7c837394f99257e2d55b461611 \ + --hash=sha256:8ad4aeb3e9a97286573c03df758fc7627aecdd02f1da04516a86dc159bf70121 \ + --hash=sha256:964faa8a861d2664f0c7ab0c181af0bea66098b1919439815ca8803ef136fc4e \ + --hash=sha256:9dc1b507c12eb0481d071f3c1808f0529ad41dc415d0ca11f7ebfc666e66a18b \ + --hash=sha256:9ebfef07dbe1d93efb94b4700f2d278494e9162565a54f124c404a5656d7ff09 \ + --hash=sha256:a45f84b09ac9c3d35dfcf6a27fd0634d30d183205230a0ebe8373a0e8cfa0906 \ + --hash=sha256:a4f55095ad087474999ee28d3398bae183a66be4823f753cd7d67dd0153427c9 \ + --hash=sha256:a6d511cc297ff0883bc3708b465ff82d7560193169a8b93260f74ecb0a5e08a7 \ + --hash=sha256:a8ad4c766d3f33ba8fd692f9aa297c9058970530a32c728a2c4bfd2616d3358b \ + --hash=sha256:aa2f457b4af386254372dfa78a2eda2563680d982422641a85f271c859df1987 \ + --hash=sha256:b03f7941783b4c4a26051846dea594628b38f6940a2fdc0df00b221aed39314c \ + --hash=sha256:b0dae11d8f5ded51699c74d9548dcc5938e0804cc8298ec0aa0da95c21fff57b \ + --hash=sha256:b91ced227c41aa29c672814f50dbb05ec93536abf8f43cd14ec9521ea09afe4e \ + --hash=sha256:bc633a9fe1eb87e250b5c57d389cf28998e4292336926b0b6cdaee353f89a237 \ + --hash=sha256:bebb4d6715c814597f85297c332297c6ce81e29436125ca59d1159b07f423eb1 \ + --hash=sha256:c336a6d235522a62fef872c6295a42ecb0c4e1d0f1a3e500fe949415761b8a19 \ + --hash=sha256:c6514f963b023aeee506678a1cf821fe31159b925c4b76fe2afa94cc70b3222b \ + --hash=sha256:c693e916709c2465b02ca0ad7b387c4f8423d1db7b4649c551f27a529181c5ad \ + --hash=sha256:c81131869240e3e568916ef4c307f8b99583efaa60a8112ef27a366eefba8ef0 \ + --hash=sha256:d02a72df14dfdbaf228424573a07af10637bd490f0901cee872c4f434a735b94 \ + --hash=sha256:d2a8fa9d6d6f891f3deec72f5cc668e6f66b188ab14bb1ab52422fe8e644f312 \ + --hash=sha256:d2b27e6af28f07e2f195552b37d7d66b150adbaa39a6d327766ffd695799780f \ + --hash=sha256:d2fe69c5434391727efa54b47a1e7986bb0186e72a41b203df8f5b0a19a4f669 \ + --hash=sha256:d3f3ed29cd9f978c604708511a1f9c2fdcb6c38b9aae36a51905b8811ee5cbf1 \ + --hash=sha256:d573faf8eb7e6b1cbbcb4f5b247c60ca8be39fe2c674495df0eb4318303137fe \ + --hash=sha256:e0bbdd76ce9aa5d4209d65f2b27fc6e5ef1312ae6c5333c26db3f5ade53a1e99 \ + --hash=sha256:e7c4ea22b6739b162c9ecaaa41d718dfad48a244909fe7ef4b54c0b530effc5a \ + --hash=sha256:e93e1a4b4b33daed65d781a57a522ff153dcf748dee70b40c7258c5861e1768a \ + --hash=sha256:e97fdf088d4b31ff4ba35db26d9cc472ac7ef4a2ff2badeabf8d727b3377fc52 \ + --hash=sha256:e9fa4c9bf273ca41f940bceb86922a7667cd5bf90e95dbb157cbb8441008482c \ + --hash=sha256:eaad4ff2de1c3823fddf82f41121bdf453d922e9a238642b1dedb33c4e4f98ad \ + --hash=sha256:f1f62b2413c3a0e846c3b838b2ecd6c7a19ec6793b2a522745b0869e37ab5bc1 \ + --hash=sha256:f6d6cff3538391e8486a431569b77921adfcdef14eb18fbf19b7c0a5294d4e6a \ + --hash=sha256:f9aa05d09ecf4c75157197f27cdc9cfaeb7c5f15021c6373932bf3e124af029f \ + --hash=sha256:fa2fddcb7107e0d1808086ca306dcade7df60a13a6c347a7acf1ec139aa6789a \ + --hash=sha256:faa6b09ee09433b87992fb5a2859efd1c264ddc37280d2dd5db502126d0e7f27 + # via pydantic +pydantic-settings==2.3.4 \ + --hash=sha256:11ad8bacb68a045f00e4f862c7a718c8a9ec766aa8fd4c32e39a0594b207b53a \ + --hash=sha256:c5802e3d62b78e82522319bbc9b8f8ffb28ad1c988a99311d04f2a6051fca0a7 + # via bump-my-version +pygments==2.18.0 \ + --hash=sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199 \ + --hash=sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a + # via rich +pyproject-api==1.8.0 \ + --hash=sha256:3d7d347a047afe796fd5d1885b1e391ba29be7169bd2f102fcd378f04273d228 \ + --hash=sha256:77b8049f2feb5d33eefcc21b57f1e279636277a8ac8ad6b5871037b243778496 + # via tox +pyproject-hooks==1.2.0 \ + --hash=sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8 \ + --hash=sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913 + # via build +python-dotenv==1.0.1 \ + --hash=sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca \ + --hash=sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a + # via pydantic-settings +questionary==2.0.1 \ + --hash=sha256:8ab9a01d0b91b68444dff7f6652c1e754105533f083cbe27597c8110ecc230a2 \ + --hash=sha256:bcce898bf3dbb446ff62830c86c5c6fb9a22a54146f0f5597d3da43b10d8fc8b + # via bump-my-version +requests==2.32.3 \ + --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ + --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 + # via coveralls +rich==13.7.1 \ + --hash=sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222 \ + --hash=sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432 + # via + # bump-my-version + # rich-click +rich-click==1.8.3 \ + --hash=sha256:636d9c040d31c5eee242201b5bf4f2d358bfae4db14bb22ec1cafa717cfd02cd \ + --hash=sha256:6d75bdfa7aa9ed2c467789a0688bc6da23fbe3a143e19aa6ad3f8bac113d2ab3 + # via bump-my-version +setuptools-scm==8.1.0 \ + --hash=sha256:42dea1b65771cba93b7a515d65a65d8246e560768a66b9106a592c8e7f26c8a7 \ + --hash=sha256:897a3226a6fd4a6eb2f068745e49733261a21f70b1bb28fce0339feb978d9af3 + # via -r CI/requirements_ci.in +tomli==2.0.1 \ + --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ + --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f + # via + # build + # coverage + # pyproject-api + # setuptools-scm + # tox +tomlkit==0.13.0 \ + --hash=sha256:08ad192699734149f5b97b45f1f18dad7eb1b6d16bc72ad0c2335772650d7b72 \ + --hash=sha256:7075d3042d03b80f603482d69bf0c8f345c2b30e41699fd8883227f89972b264 + # via bump-my-version +tox==4.21.0 \ + --hash=sha256:693ac51378255d34ad7aab6dd2ce9ab6a1cf1924eb930183fde850ad503b681d \ + --hash=sha256:e64dd9847ff3a7ec90368be412d7efe61a39caf043222ffbe9ad638ea435f6f6 + # via + # -r CI/requirements_ci.in + # tox-gh +tox-gh==1.4.1 \ + --hash=sha256:005b33d16eef1bd1dae9f7d8b3cef53374af7d475f9c9c33ef098247741fb694 \ + --hash=sha256:da422beccbdc5ad5994fe8faf6c193f2d794e957628b052ba23e7fcf9e2e340f + # via -r CI/requirements_ci.in +typing-extensions==4.12.2 \ + --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ + --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 + # via + # pydantic + # pydantic-core + # rich-click + # tox +urllib3==2.2.2 \ + --hash=sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 \ + --hash=sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168 + # via requests +virtualenv==20.26.6 \ + --hash=sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48 \ + --hash=sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2 + # via tox +wcmatch==8.5.2 \ + --hash=sha256:17d3ad3758f9d0b5b4dedc770b65420d4dac62e680229c287bf24c9db856a478 \ + --hash=sha256:a70222b86dea82fb382dd87b73278c10756c138bd6f8f714e2183128887b9eb2 + # via bump-my-version +wcwidth==0.2.13 \ + --hash=sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859 \ + --hash=sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5 + # via prompt-toolkit + +# The following packages are considered to be unsafe in a requirements file: +pip==24.2 \ + --hash=sha256:2cd581cf58ab7fcfca4ce8efa6dcacd0de5bf8d0a3eb9ec927e07405f4d9e2a2 \ + --hash=sha256:5b5e490b5e9cb275c879595064adce9ebd31b854e3e803740b72f9ccf34a45b8 + # via -r CI/requirements_ci.in +setuptools==75.1.0 \ + --hash=sha256:35ab7fd3bcd95e6b7fd704e4a1539513edad446c097797f2985e0e4b960772f2 \ + --hash=sha256:d59a21b17a275fb872a9c3dae73963160ae079f1049ed956880cd7c09b120538 + # via + # -r CI/requirements_ci.in + # setuptools-scm diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 778e6dde..a0190a43 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,3 +1,4 @@ + # Contributor Covenant Code of Conduct ## Our Pledge @@ -6,8 +7,8 @@ We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, -nationality, personal appearance, race, religion, or sexual identity -and orientation. +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. @@ -22,17 +23,17 @@ community include: * Giving and gracefully accepting constructive feedback * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience -* Focusing on what is best not just for us as individuals, but for the - overall community +* Focusing on what is best not just for us as individuals, but for the overall + community Examples of unacceptable behavior include: -* The use of sexualized language or imagery, and sexual attention or - advances of any kind +* The use of sexualized language or imagery, and sexual attention or advances of + any kind * Trolling, insulting or derogatory comments, and personal or political attacks * Public or private harassment -* Publishing others' private information, such as a physical or email - address, without their explicit permission +* Publishing others' private information, such as a physical or email address, + without their explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting @@ -52,7 +53,7 @@ decisions when appropriate. This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. -Examples of representing our community include using an official e-mail address, +Examples of representing our community include using an official email address, posting via an official social media account, or acting as an appointed representative at an online or offline event. @@ -60,7 +61,7 @@ representative at an online or offline event. Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at -support@ouranos.ca. +support@ouranos.ca All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the @@ -82,15 +83,15 @@ behavior was inappropriate. A public apology may be requested. ### 2. Warning -**Community Impact**: A violation through a single incident or series -of actions. +**Community Impact**: A violation through a single incident or series of +actions. **Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels -like social media. Violating these terms may lead to a temporary or -permanent ban. +like social media. Violating these terms may lead to a temporary or permanent +ban. ### 3. Temporary Ban @@ -106,23 +107,27 @@ Violating these terms may lead to a permanent ban. ### 4. Permanent Ban **Community Impact**: Demonstrating a pattern of violation of community -standards, including sustained inappropriate behavior, harassment of an +standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. -**Consequence**: A permanent ban from any sort of public interaction within -the community. +**Consequence**: A permanent ban from any sort of public interaction within the +community. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], -version 2.0, available at -https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. - -Community Impact Guidelines were inspired by [Mozilla's code of conduct -enforcement ladder](https://github.com/mozilla/diversity). +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. -[homepage]: https://www.contributor-covenant.org +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. For answers to common questions about this code of conduct, see the FAQ at -https://www.contributor-covenant.org/faq. Translations are available at -https://www.contributor-covenant.org/translations. +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index d10ab214..35f6a7de 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -1,5 +1,3 @@ -.. highlight:: shell - ============ Contributing ============ @@ -46,22 +44,23 @@ If you are proposing a feature: * Explain in detail how it would work. * Keep the scope as narrow as possible, to make it easier to implement. -* Remember that this is a volunteer-driven project, and that contributions - are welcome. :) +* Remember that this is a volunteer-driven project, and that contributions are welcome. :) Get Started! ------------ .. note:: - If you are new to using GitHub and `git`, please read `this guide `_ first. + If you are new to using `GitHub `_ and ``git``, please read `this guide `_ first. .. warning:: - Anaconda Python users: Due to the complexity of some packages, the default dependency solver can take a long time to resolve the environment. Consider running the following commands in order to speed up the process:: + Anaconda Python users: Due to the complexity of some packages, the default dependency solver can take a long time to resolve the environment. Consider running the following commands in order to speed up the process: + + .. code-block:: console - $ conda install -n base conda-libmamba-solver - $ conda config --set solver libmamba + conda install -n base conda-libmamba-solver + conda config --set solver libmamba For more information, please see the following link: https://www.anaconda.com/blog/a-faster-conda-for-a-growing-community @@ -69,76 +68,128 @@ Get Started! Ready to contribute? Here's how to set up ``xscen`` for local development. -#. Clone the repo locally:: +#. First, clone the ``xscen`` repo locally. - $ git clone git@github.com:Ouranosinc/xscen.git + * If you are not a ``xscen`` collaborator, first fork the ``xscen`` repo on GitHub, then clone your fork locally. -#. Install your local copy into a development environment. You can create a new Anaconda development environment with:: + .. code-block:: console - $ conda env create -f environment-dev.yml - $ conda activate xscen-dev - $ python -m pip install --editable ".[dev]" + git clone git@github.com:your_name_here/xscen.git - This installs ``xscen`` in an "editable" state, meaning that changes to the code are immediately seen by the environment. + * If you are a ``xscen`` collaborator, clone the ``xscen`` repo directly. -#. As xscen was installed in editable mode, we also need to compile the translation catalogs manually:: + .. code-block:: console - $ make translate + git clone git@github.com:Ouranosinc/xscen.git + +#. Install your local copy into a development environment. You can create a new Anaconda development environment with: + + .. code-block:: console + + conda env create -f environment-dev.yml + conda activate xscen-dev + make dev + + If you are on Windows, replace the ``make dev`` command with the following: + + .. code-block:: console + + python -m pip install -e .[dev] + pre-commit install -#. To ensure a consistent coding style, install the ``pre-commit`` hooks to your local clone:: + This installs ``xscen`` in an "editable" state, meaning that changes to the code are immediately seen by the environment. To ensure a consistent coding style, `make dev` also installs the ``pre-commit`` hooks to your local clone. - $ pre-commit install + On commit, ``pre-commit`` will check that ``black``, ``blackdoc``, ``isort``, ``flake8``, and ``ruff`` checks are passing, perform automatic fixes if possible, and warn of violations that require intervention. If your commit fails the checks initially, simply fix the errors, re-add the files, and re-commit. - On commit, ``pre-commit`` will check that ``black``, ``blackdoc``, ``isort``, ``flake8``, and ``ruff`` checks are passing, perform automatic fixes if possible, and warn of violations that require intervention. If your commit fails the checks initially, simply fix the errors, re-add the files, and re-commit. + You can also run the hooks manually with: - You can also run the hooks manually with:: + .. code-block:: console - $ pre-commit run -a + pre-commit run -a - If you want to skip the ``pre-commit`` hooks temporarily, you can pass the ``--no-verify`` flag to `$ git commit`. + If you want to skip the ``pre-commit`` hooks temporarily, you can pass the `--no-verify` flag to `$ git commit`. -#. Create a branch for local development:: +#. As xscen was installed in editable mode, we also need to compile the translation catalogs manually: - $ git checkout -b name-of-your-bugfix-or-feature + .. code-block:: console - Now you can make your changes locally. + make translate -#. When you're done making changes, we **strongly** suggest running the tests in your environment or with the help of ``tox``:: +#. Create a branch for local development: - $ python -m pytest - # Or, to run multiple build tests - $ tox + .. code-block:: console - Alternatively, you can run the tests using `make`:: + git checkout -b name-of-your-bugfix-or-feature - $ make lint - $ make test + Now you can make your changes locally. - Running `make lint` and `make test` demands that your runtime/dev environment have all necessary development dependencies installed. +#. When you're done making changes, we **strongly** suggest running the tests in your environment or with the help of ``tox``: + + .. code-block:: console + + make lint + python -m pytest + # Or, to run multiple build tests + python -m tox + + Alternatively, you can run the tests using `make`: + + .. code-block:: console + + make lint + make test + + Running `make lint` and `make test` demands that your runtime/dev environment have all necessary development dependencies installed. .. warning:: - Due to some dependencies only being available via Anaconda/conda-forge or built from source, `tox`-based testing will only work if `ESMF`_ is available in your system path. This also requires that the `ESMF_VERSION` environment variable (matching the version of ESMF installed) be accessible within your shell as well (e.g.: `$ export ESMF_VERSION=8.5.0`). + Due to some dependencies only being available via Anaconda/conda-forge or built from source, `tox`-based testing will only work if `ESMF `_ is available in your system path. This also requires that the `ESMF_VERSION` environment variable (matching the version of ESMF installed) be accessible within your shell as well (e.g.: `$ export ESMF_VERSION=8.5.0`). + +#. Commit your changes and push your branch to GitHub: + + .. code-block:: console -#. Commit your changes and push your branch to GitHub:: + git add . + git commit -m "Your detailed description of your changes." + git push origin name-of-your-bugfix-or-feature - $ git add . - $ git commit -m "Your detailed description of your changes." - $ git push origin name-of-your-bugfix-or-feature + If ``pre-commit`` hooks fail, try fixing the issues, re-staging the files to be committed, and re-committing your changes (or, if need be, you can skip them with `git commit --no-verify`). - If ``pre-commit`` hooks fail, try re-committing your changes (or, if need be, you can skip them with `$ git commit --no-verify`). #. Submit a `Pull Request `_ through the GitHub website. -#. When pushing your changes to your branch on GitHub, the documentation will automatically be tested to reflect the changes in your Pull Request. This build process can take several minutes at times. If you are actively making changes that affect the documentation and wish to save time, you can compile and test your changes beforehand locally with:: +#. If changes to your branch are made on GitHub, you can update your local branch with: - # To generate the html and open it in your browser - $ make docs - # To only generate the html - $ make autodoc - $ make -C docs html - # To simply test that the docs pass build checks - $ tox -e docs + .. code-block:: console + + git checkout name-of-your-bugfix-or-feature + git fetch + git pull origin name-of-your-bugfix-or-feature + + If you have merge conflicts, you might need to replace `git pull` with `git merge` and resolve the conflicts manually. + Resolving conflicts from the command line can be tricky. If you are not comfortable with this, you can ignore the last command and instead use a GUI like PyCharm or Visual Studio Code to merge the remote changes and resolve the conflicts. + +#. Before merging, your Pull Request will need to be based on the `main` branch of the `xscen`` repository. If your branch is not up-to-date with the `main` branch, you can perform similar steps as above to update your branch: + + .. code-block:: console + + git checkout name-of-your-bugfix-or-feature + git fetch + git pull origin main + + See the previous step for more information on resolving conflicts. + +#. When pushing your changes to your branch on GitHub, the documentation will automatically be tested to reflect the changes in your Pull Request. This build process can take several minutes at times. If you are actively making changes that affect the documentation and wish to save time, you can compile and test your changes beforehand locally with: + + .. code-block:: console + + # To generate the html and open it in your browser + make docs + # To only generate the html + make autodoc + make -C docs html + # To simply test that the docs pass build checks + python -m tox -e docs .. note:: @@ -146,13 +197,13 @@ Ready to contribute? Here's how to set up ``xscen`` for local development. In order to speed up documentation builds, setting a value for the environment variable "SKIP_NOTEBOOKS" (e.g. "$ export SKIP_NOTEBOOKS=1") will prevent the notebooks from being evaluated on all subsequent "$ tox -e docs" or "$ make docs" invocations. -#. Once your Pull Request has been accepted and merged to the ``main`` branch, several automated workflows will be triggered: +#. Once your Pull Request has been accepted and merged to the `main` branch, several automated workflows will be triggered: - - The ``bump-version.yml`` workflow will automatically bump the patch version when pull requests are pushed to the ``main`` branch on GitHub. **It is not recommended to manually bump the version in your branch when merging (non-release) pull requests (this will cause the version to be bumped twice).** + - The ``bump-version.yml`` workflow will automatically bump the patch version when pull requests are pushed to the `main` branch on GitHub. **It is not recommended to manually bump the version in your branch when merging (non-release) pull requests (this will cause the version to be bumped twice).** - `ReadTheDocs` will automatically build the documentation and publish it to the `latest` branch of `xscen` documentation website. - If your branch is not a fork (ie: you are a maintainer), your branch will be automatically deleted. -You will have contributed your first changes to ``xscen``! +You will have contributed to ``xscen``! .. _translating-xscen: @@ -187,7 +238,7 @@ Once the code is implemented and translatable strings are marked as such, we nee $ make findfrench -Then go edit ``xscen/xscen/data/fr/LC_MESSAGES/xscen.po`` with the correct French translations. Finally, running:: +Then go edit ``xscen/xscen/data/fr/LC_MESSAGES/xscen.po`` with the correct French translations. Finally, run:: $ make translate @@ -200,28 +251,43 @@ Before you submit a pull request, check that it meets these guidelines: #. The pull request should include tests and should aim to provide `code coverage `_ for all new lines of code. You can use the ``--cov-report html --cov xscen`` flags during the call to ``pytest`` to generate an HTML report and analyse the current test coverage. -#. If the pull request adds functionality, the docs should also be updated. Put your new functionality into a function with a docstring, and add the feature to the list in ``README.rst``. +#. All functions should be documented with `docstrings` following the `numpydoc `_ format. + +#. If the pull request adds functionality, either update the documentation or create a new notebook that demonstrates the feature. Library-defining features should also be listed in ``README.rst``. #. The pull request should not break the templates. -#. The pull request should work for Python 3.9, 3.10, 3.11, and 3.12. Check that the tests pass for all supported Python versions. +#. The pull request should work for all currently supported Python versions. Check the `pyproject.toml` or `tox.ini` files for the list of supported versions. We aim to follow the support and drop schedule of Python versions as recommended by the NumPy NEP calendar: https://numpy.org/neps/nep-0029-deprecation_policy.html Tips ---- -To run a subset of tests:: +To run a subset of tests: + +.. code-block:: console + + python -m pytest tests/test_xscen.py + +You can also directly call a specific test class or test function using: + +.. code-block:: console + + python -m pytest tests/test_xscen.py::TestClassName::test_function_name + +For more information on running tests, see the `pytest documentation `_. -$ pytest tests.test_xscen +To run specific code style checks: -To run specific code style checks:: +.. code-block:: console - $ black --check xscen tests - $ isort --check xscen tests - $ blackdoc --check xscen docs - $ ruff xscen tests - $ flake8 xscen tests + python -m black --check src/xscen tests + python -m isort --check src/xscen tests + python -m blackdoc --check src/xscen docs + python -m ruff check src/xscen tests + python -m flake8 src/xscen tests + validate-docstrings src/xscen/**.py -To get ``black``, ``isort``, ``blackdoc``, ``ruff``, and ``flake8`` (with plugins ``flake8-alphabetize`` and ``flake8-rst-docstrings``) simply install them with `pip` (or `conda`) into your environment. +To get ``black``, ``isort``, ``blackdoc``, ``ruff``, ``flake8`` (with the ``flake8-rst-docstrings`` plugin), and ``numpydoc`` (for ``validate-docstrings``), simply install them with ``pip`` (or ``conda``) into your environment. Versioning/Tagging ------------------ @@ -275,5 +341,8 @@ From the command line on your Linux distribution, simply run the following from # To upload to PyPI $ twine upload dist/* +Code of Conduct +--------------- -.. _`ESMF`: http://earthsystemmodeling.org/download/ +Please note that this project is released with a `Contributor Code of Conduct `_. +By participating in this project you agree to abide by its terms. diff --git a/MANIFEST.in b/MANIFEST.in index 70c096ad..070cdb41 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,16 +1,17 @@ include AUTHORS.rst +include CHANGELOG.rst include CONTRIBUTING.rst -include HISTORY.rst include LICENSE include Makefile include README.rst +include pyproject.toml include .zenodo.json -recursive-include xscen *.py *.yml -recursive-include xscen/CVs *.json -recursive-include xscen/data/fr *.yml *.csv -recursive-include xscen/data *.nc -recursive-include xscen/data/fr/LC_MESSAGES *.mo *.po +recursive-include src/xscen *.py *.yml +recursive-include src/xscen/CVs *.json +recursive-include src/xscen/data/fr *.yml *.csv +recursive-include src/xscen/data *.nc +recursive-include src/xscen/data *.mo *.po recursive-include tests *.py recursive-include docs conf.py Makefile make.bat *.png *.rst *.yml recursive-include docs/locales *.mo *.po diff --git a/Makefile b/Makefile index eb45ca03..1f42c520 100644 --- a/Makefile +++ b/Makefile @@ -56,13 +56,14 @@ clean-test: ## remove test and coverage artifacts rm -fr .pytest_cache lint/flake8: ## check style with flake8 - ruff xscen tests - flake8 --config=.flake8 xscen tests + python -m ruff check src/xscen tests + python -m flake8 --config=.flake8 src/xscen tests + # python -m numpydoc lint src/xscen/**.py # FIXME: disabled until the codebase is fully numpydoc compliant lint/black: ## check style with black - black --check xscen tests - blackdoc --check xscen docs - isort --check xscen tests + python -m black --check src/xscen tests + python -m blackdoc --check src/xscen docs + python -m isort --check src/xscen tests lint: lint/black lint/flake8 ## check style @@ -70,14 +71,14 @@ test: ## run tests quickly with the default Python python -m pytest test-all: ## run tests on every Python version with tox - tox + python -m tox initialize-translations: clean-docs ## initialize translations, ignoring autodoc-generated files ${MAKE} -C docs gettext sphinx-intl update -p docs/_build/gettext -d docs/locales -l fr autodoc: clean-docs ## create sphinx-apidoc files - sphinx-apidoc -o docs/apidoc --module-first xscen + sphinx-apidoc -o docs/apidoc --module-first src/xscen linkcheck: autodoc ## run checks over all external links found throughout the documentation env SKIP_NOTEBOOKS=1 $(MAKE) -C docs linkcheck @@ -106,11 +107,22 @@ install: clean ## install the package to the active Python's site-packages python -m pip install . dev: clean ## install the package in editable mode with all development dependencies - python -m pip install --editable ".[dev]" + python -m pip install --editable ".[all]" + pre-commit install findfrench: ## Extract phrases and update the French translation catalog (this doesn't translate) - python setup.py extract_messages - python setup.py update_catalog -l fr + pybabel extract -o src/xscen/data/messages.pot --omit-header --input-dirs=src/xscen/ + pybabel update -l fr -D xscen -i src/xscen/data/messages.pot -d src/xscen/data/ --omit-header --no-location translate: ## Compile the translation catalogs. - python setup.py compile_catalog + pybabel compile -f -D xscen -d src/xscen/data/ + +MO_LAST_COMMIT = $(shell git log -n 1 --pretty=format:%H -- src/xscen/data/fr/LC_MESSAGES/xscen.mo) +PO_LAST_COMMIT = $(shell git log -n 1 --pretty=format:%H -- src/xscen/data/fr/LC_MESSAGES/xscen.po) +checkfrench: ## Error if the catalog could be update or if the compilation is older than the catalog. + rm -f .check_messages.pot + pybabel extract -o .check_messages.pot --omit-header --input-dirs=src/xscen/ --no-location + pybabel update -l fr -D xscen -i .check_messages.pot -d src/xscen/data/ --omit-header --check + rm -f .check_messages.pot + # Last commit that touched the PO file must be an ancestor of the last that touched the MO + if git merge-base --is-ancestor $(PO_LAST_COMMIT) $(MO_LAST_COMMIT); then echo "ok"; else echo "Compilation is older than translations. Please compile with 'make translate'."; exit 1; fi diff --git a/conda/xscen/meta.yaml b/conda/xscen/meta.yaml deleted file mode 100644 index f1bfff33..00000000 --- a/conda/xscen/meta.yaml +++ /dev/null @@ -1,76 +0,0 @@ -{% set name = "xscen" %} - -package: - name: {{ name|lower }} - version: {{ environ['GIT_DESCRIBE_TAG'] }} - -source: - path: ../../ - -channels: - - conda-forge - - defaults - -build: - noarch: python - script: {{ PYTHON }} -m pip install . -vv - number: 0 - -requirements: - host: - - python >=3.9 - - pip - run: - - cartopy - - cftime - - cf_xarray >=0.7.6 - - clisops >=0.10 - - dask-core - - flox - - fsspec <2023.10.0 - - geopandas - - h5netcdf - - h5py - - intake-esm >=2023.07.07 - - matplotlib - - netCDF4 - - numcodecs - - numpy - - pandas >= 2 - - parse - - pyyaml - - rechunker - - scipy - - shapely >= 2 - - sparse <=0.14 - - toolz - - xarray <2023.11.0 # FIXME: Remove when pandas 2.2 is released and xclim is fixed. - - xclim >=0.46.0 - - xesmf >=0.7 - - zarr - # Opt - - nc-time-axis >=1.3.1 - - pyarrow >=1.0.0 - -test: - imports: - - xscen - commands: - - pip check - requires: - - pip - - pytest-json-report # Added due to a bug in esmpy. See: https://github.com/esmf-org/esmf/issues/115 - -about: - home: https://github.com/Ouranosinc/xscen - summary: A climate change scenario-building analysis framework, built with xclim/xarray. - license: Apache-2.0 - license_file: LICENSE - -extra: - recipe-maintainers: - # GitHub.com - - Zeitsperre - - RondeauG - - aulemahal - - juliettelavoie diff --git a/conda/xscen/recipe.yaml b/conda/xscen/recipe.yaml deleted file mode 100644 index 81494f77..00000000 --- a/conda/xscen/recipe.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Build recipe using `boa` build standard. Not suitable for conda-forge. See: https://github.com/mamba-org/boa - -context: - name: xscen - version: 0.5.0 - -package: - name: '{{ name|lower }}' - version: '{{ version }}' - -source: - url: https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/xscen-{{ version }}.tar.gz - sha256: f31df2cb52e87dd82d2fc7d340788e4edf14abccf04685a9249a2067594b721a - -build: - noarch: python - script: '{{ PYTHON }} -m pip install . -vv' - number: 1 - -requirements: - host: - - python >=3.9 - - pip - run: - - cartopy - - cftime - - cf_xarray >=0.7.6 - - clisops >=0.10 - - dask-core - - flox - - fsspec <2023.10.0 - - geopandas - - h5netcdf - - h5py - - intake-esm >=2023.07.07 - - matplotlib - - netCDF4 - - numcodecs - - numpy - - pandas >= 2 - - parse - - pyyaml - - rechunker - - scipy - - shapely >= 2 - - sparse <=0.14 - - toolz - - xarray <2023.11.0 # FIXME: Remove when pandas 2.2 is released and xclim is fixed. - - xclim >=0.46.0 - - xesmf >=0.7 - - zarr - # Opt - - nc-time-axis >=1.3.1 - - pyarrow >=1.0.0 - -test: - imports: - - xscen - commands: - - pip check - requires: - - pip - - pytest-json-report # Added due to a bug in esmpy. See: https://github.com/esmf-org/esmf/issues/115 - -about: - home: https://github.com/Ouranosinc/xscen - summary: A climate change scenario-building analysis framework, built with xclim/xarray. - license: Apache-2.0 - license_file: LICENSE - -extra: - recipe-maintainers: - # Anaconda.org - - Zeitsperre - - aule diff --git a/docs/changelog.rst b/docs/changelog.rst new file mode 100644 index 00000000..565b0521 --- /dev/null +++ b/docs/changelog.rst @@ -0,0 +1 @@ +.. include:: ../CHANGELOG.rst diff --git a/docs/changes.rst b/docs/changes.rst deleted file mode 100644 index d9e113ec..00000000 --- a/docs/changes.rst +++ /dev/null @@ -1 +0,0 @@ -.. include:: ../CHANGES.rst diff --git a/docs/conf.py b/docs/conf.py index 4284498c..20e10c3b 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -141,10 +141,8 @@ # templates_path = ['_templates'] # The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = [".rst"] +# You can specify multiple suffix as a dictionary of suffix: filetype +source_suffix = {'.rst': 'restructuredtext'} # The master toctree document. master_doc = "index" @@ -213,15 +211,15 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". +if not os.path.exists("_static"): + os.makedirs("_static") html_static_path = ["_static"] - # -- Options for HTMLHelp output --------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = "xscendoc" - # -- Options for LaTeX output ------------------------------------------ latex_elements = { @@ -252,14 +250,12 @@ ), ] - # -- Options for manual page output ------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [(master_doc, "xscen", "xscen Documentation", [author], 1)] - # -- Options for Texinfo output ---------------------------------------- # Grouping the document tree into Texinfo files. List of tuples diff --git a/docs/index.rst b/docs/index.rst index 6e316cab..a46a2ea8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -32,8 +32,9 @@ Features templates api contributing + releasing authors - changes + changelog security .. toctree:: diff --git a/docs/installation.rst b/docs/installation.rst index fe8e93ce..9dee7071 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -2,6 +2,20 @@ Installation ============ +We strongly recommend installing xscen in an Anaconda Python environment. +Furthermore, due to the complexity of some packages, the default dependency solver can take a long time to resolve the environment. +If `mamba` is not already your default solver, consider running the following commands in order to speed up the process: + + .. code-block:: console + + conda install -n base conda-libmamba-solver + conda config --set solver libmamba + +If you don't have `pip`_ installed, this `Python installation guide`_ can guide you through the process. + +.. _pip: https://pip.pypa.io +.. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/ + Official Sources ---------------- @@ -11,41 +25,76 @@ Because of some packages being absent from PyPI (such as `xESMF`), we strongly r .. code-block:: console - $ conda install -c conda-forge xscen + conda install -c conda-forge xscen + +This is the preferred method to install xscen, as it will always install the most recent stable release. .. note:: - If you are unable to install the package due to missing dependencies, ensure that `conda-forge` is listed as a source in your `conda` configuration: `$ conda config --add channels conda-forge`! + If you are unable to install the package due to missing dependencies, ensure that `conda-forge` is listed as a source in your `conda` configuration: `conda config --add channels conda-forge`! If for some reason you wish to install the `PyPI` version of `xscen` into an existing Anaconda environment (*not recommended*), this can be performed with: .. code-block:: console - $ python -m pip install xscen + python -m pip install xscen Development Installation (Anaconda + pip) ----------------------------------------- -For development purposes, we provide the means for generating a conda environment with the latest dependencies in an `environment.yml` file at the top-level of the `Github repo`_. +For development purposes, we provide the means for generating a conda environment with the latest dependencies in an `environment.yml` file at the top-level of the `Github repo `_. -In order to get started, first clone the repo locally: +The sources for xscen can be downloaded from the `Github repo`_. -.. code-block:: console +#. Download the source code from the `Github repo`_ using one of the following methods: - $ git clone git@github.com:Ouranosinc/xscen.git + * Clone the public repository: -Then you can create the environment and install the package: + .. code-block:: console -.. code-block:: console + git clone git@github.com:Ouranosinc/xscen.git - $ cd xscen - $ conda env create -f environment.yml + * Download the `tarball `_: -Finally, perform an `--editable` install of xscen and compile the translation catalogs: + .. code-block:: console -.. code-block:: console + curl -OJL https://github.com/Ouranosinc/xscen/tarball/main + +#. Once you have a copy of the source, you can install it with: + + .. code-block:: console + + conda env create -f environment-dev.yml + conda activate xscen-dev + make dev + + If you are on Windows, replace the ``make dev`` command with the following: + + .. code-block:: console + + python -m pip install -e .[dev] + + Even if you do not intend to contribute to `xscen`, we favor using `environment-dev.yml` over `environment.yml` because it includes additional packages that are used to run all the examples provided in the documentation. + If for some reason you wish to install the `PyPI` version of `xscen` into an existing Anaconda environment (*not recommended if requirements are not met*), only run the last command above. + +#. When new changes are made to the `Github repo`_, if using a clone, you can update your local copy using the following commands from the root of the repository: + + .. code-block:: console + + git fetch + git checkout main + git pull origin main + conda env update -n xscen-dev -f environment-dev.yml + conda activate xscen-dev + make dev + + These commands should work most of the time, but if big changes are made to the repository, you might need to remove the environment and create it again. + +#. Finally, in order to compile the translation catalogs, run the following command from the root of the repository: + + .. code-block:: console - $ python -m pip install -e . - $ make translate + python -m pip install -e . + make translate .. _Github repo: https://github.com/Ouranosinc/xscen diff --git a/docs/notebooks/2_getting_started.ipynb b/docs/notebooks/2_getting_started.ipynb index 0fdad2e3..6689334e 100644 --- a/docs/notebooks/2_getting_started.ipynb +++ b/docs/notebooks/2_getting_started.ipynb @@ -549,13 +549,7 @@ "\n", "Masks can be used on both the original grid and the destination grid to ignore certain grid cells during the regridding process. These masks follow the `ESMF` convention, meaning that the mask is a variable within the Dataset, named *mask* and comprised of 0 and 1.\n", "\n", - "`xs.create_mask` will create an adequate DataArray, following the instructions given by *mask_args*. In the case of variables that have a time component, the first timestep will be chosen.\n", - "\n", - " mask_args:\n", - " - 'variable' (optional)\n", - " - 'where_operator' (optional)\n", - " - 'where_threshold' (optional)\n", - " - 'mask_nans': bool" + "`xs.create_mask` will create an adequate DataArray, following the instructions given to the function. In the case of variables that have a time component, the first timestep will be chosen." ] }, { @@ -567,14 +561,6 @@ }, "outputs": [], "source": [ - "# Will mask all pixels that do not match these requirements (at least 25% land)\n", - "mask_args = {\n", - " \"variable\": \"sftlf\",\n", - " \"where_operator\": \">\",\n", - " \"where_threshold\": 25,\n", - " \"mask_nans\": True,\n", - "}\n", - "\n", "# to_dataset() will open the dataset, as long as the search() gave a single result.\n", "ds_example = pcat.search(\n", " id=\"CMIP6_ScenarioMIP_NCC_NorESM2-MM_ssp245_r1i1p1f1_example-region\",\n", @@ -582,8 +568,10 @@ " variable=\"sftlf\",\n", ").to_dataset()\n", "\n", - "# Masking function\n", - "ds_example[\"mask\"] = xs.create_mask(ds_example, mask_args=mask_args)" + "# Will mask all pixels that do not match these requirements (at least 25% land)\n", + "ds_example[\"mask\"] = xs.create_mask(\n", + " ds_example, variable=\"sftlf\", where_operator=\">\", where_threshold=25, mask_nans=True\n", + ")" ] }, { @@ -716,7 +704,7 @@ " pcat.search(\n", " id=ds.attrs[\"cat:id\"], processing_level=\"extracted\", variable=\"sftlf\"\n", " ).to_dataset(),\n", - " mask_args=mask_args,\n", + " **mask_args,\n", " )\n", "\n", " # Regridding function\n", @@ -1389,7 +1377,7 @@ "metadata": {}, "outputs": [], "source": [ - "convert_calendar_kwargs = {\"target\": \"standard\"}\n", + "convert_calendar_kwargs = {\"calendar\": \"standard\"}\n", "missing_by_var = {\"tas\": \"interpolate\"}" ] }, @@ -1404,14 +1392,11 @@ "\n", "It is possible to write a list of attributes to remove with `attrs_to_remove`, or a list of attributes to keep and remove everything else with `remove_all_attrs_except`. Both take the shape of a dictionnary where the keys are the variables (and 'global' for global attrs) and the values are the list.\n", "\n", - "The element of the list can be exact matches for the attribute names or use the same regex matching rules as `intake_esm`:\n", - "\n", - "- ending with a '*' means checks if the substring is contained in the string\n", - "- starting with a '^' means check if the string starts with the substring.\n", + "The element of the list can be exact matches for the attribute names or use regex matching rules (using a `fullmatch`):\n", "\n", "Attributes can also be added to datasets using `add_attrs`. This is a dictionary where the keys are the variables and the values are a another dictionary of attributes.\n", "\n", - "It is also possible to modify the catalogue prefix 'cat:' by a new string with `change_attr_prefix`. Don't use this if this is not the last step of your workflow.\n" + "It is also possible to modify the catalogue prefix 'cat:' by a new string with `change_attr_prefix`. Don't use this if this is not the last step of your workflow, as it may break some functions that rely on those prefixes to find the right dataset attributes.\n" ] }, { @@ -1422,15 +1407,15 @@ "outputs": [], "source": [ "attrs_to_remove = {\n", - " \"tas\": [\"name*\"]\n", + " \"tas\": [\".*name.*\"]\n", "} # remove tas attrs that contain the substring 'name'\n", "remove_all_attrs_except = {\n", - " \"global\": [\"^cat:\"]\n", + " \"global\": [\"cat:.*\"]\n", "} # remove all the global attrs EXCEPT for the one starting with cat:\n", "add_attrs = {\n", " \"tas\": {\"notes\": \"some crucial information\"}\n", "} # add a new tas attribute named 'notes' with value 'some crucial information'\n", - "change_attr_prefix = \"dataset:\" # change /cat to dataset:" + "change_attr_prefix = \"dataset:\" # change 'cat': to 'dataset:'" ] }, { @@ -1463,14 +1448,12 @@ "metadata": {}, "outputs": [], "source": [ - "from xclim.core.calendar import get_calendar\n", - "\n", "# Inspect calendars and the interpolated values\n", - "print(\"Initial calendar: \", get_calendar(ds.time))\n", + "print(\"Initial calendar: \", ds.time.dt.calendar)\n", "print(ds.time.sel(time=slice(\"2000-02-28\", \"2000-03-01\")).values)\n", "print(ds.tas.sel(time=slice(\"2000-02-28\", \"2000-03-01\")).isel(lat=1, lon=1).values)\n", "\n", - "print(\"Final calendar: \", get_calendar(ds_clean.time))\n", + "print(\"Final calendar: \", ds_clean.time.dt.calendar)\n", "print(ds_clean.time.sel(time=slice(\"2000-02-28\", \"2000-03-01\")).values)\n", "print(\n", " ds_clean.tas.sel(time=slice(\"2000-02-28\", \"2000-03-01\")).isel(lat=1, lon=1).values\n", @@ -1500,7 +1483,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.2" + "version": "3.12.5" } }, "nbformat": 4, diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb index abd7b709..cf6a2576 100644 --- a/docs/notebooks/4_ensembles.ipynb +++ b/docs/notebooks/4_ensembles.ipynb @@ -53,7 +53,9 @@ "}\n", "\n", "for d in datasets:\n", - " ds = open_dataset(datasets[d]).isel(lon=slice(0, 4), lat=slice(0, 4))\n", + " ds = open_dataset(datasets[d], branch=\"v2023.12.14\").isel(\n", + " lon=slice(0, 4), lat=slice(0, 4)\n", + " )\n", " ds = xs.climatological_op(\n", " ds,\n", " op=\"mean\",\n", diff --git a/docs/releasing.rst b/docs/releasing.rst new file mode 100644 index 00000000..b3edb0ef --- /dev/null +++ b/docs/releasing.rst @@ -0,0 +1,126 @@ +========= +Releasing +========= + +Deployment +---------- + +A reminder for the **maintainers** on how to deploy. This section is only relevant when producing a new point release for the package. + +.. warning:: + + It is important to be aware that any changes to files found within the ``src/xscen`` folder (with the exception of ``src/xscen/__init__.py``) will trigger the ``bump-version.yml`` workflow. Be careful not to commit changes to files in this folder when preparing a new release. + +#. Create a new branch from `main` (e.g. `release-0.2.0`). +#. Update the `CHANGELOG.rst` file to change the `Unreleased` section to the current date. +#. Bump the version in your branch to the next version (e.g. `v0.1.0 -> v0.2.0`): + + .. code-block:: console + + bump-my-version bump minor # In most cases, we will be releasing a minor version + bump-my-version bump release # This will update the version strings to drop the `dev` suffix + git push + +#. Create a pull request from your branch to `main`. +#. Once the pull request is merged, create a new release on GitHub. On the `main` branch, run: + + .. code-block:: console + + git tag v0.2.0 + git push --tags + + This will trigger a GitHub workflow to build the package and upload it to TestPyPI. At the same time, the GitHub workflow will create a draft release on GitHub. Assuming that the workflow passes, the final release can then be published on GitHub by finalizing the draft release. + +#. Once the release is published, the `publish-pypi.yml` workflow will go into an `awaiting approval` mode on Github Actions. Only authorized users may approve this workflow (notifications will be sent) to trigger the upload to PyPI. + +.. warning:: + + Uploads to PyPI can **never** be overwritten. If you make a mistake, you will need to bump the version and re-release the package. If the package uploaded to PyPI is broken, you should modify the GitHub release to mark the package as broken, as well as yank the package (mark the version "broken") on PyPI. + +Packaging +--------- + +When a new version has been minted (features have been successfully integrated test coverage and stability is adequate), maintainers should update the pip-installable package (wheel and source release) on PyPI as well as the binary on conda-forge. + +The simple approach +~~~~~~~~~~~~~~~~~~~ + +The simplest approach to packaging for general support (pip wheels) requires that `flit` be installed: + + .. code-block:: console + + python -m pip install flit + +From the command line on your Linux distribution, simply run the following from the clone's main dev branch: + + .. code-block:: console + + # To build the packages (sources and wheel) + make dist + + # To upload to PyPI + make release + +The new version based off of the version checked out will now be available via `pip` (`pip install xscen`). + +Releasing on conda-forge +~~~~~~~~~~~~~~~~~~~~~~~~ + +Initial Release +^^^^^^^^^^^^^^^ + +Before preparing an initial release on conda-forge, we *strongly* suggest consulting the following links: + * https://conda-forge.org/docs/maintainer/adding_pkgs.html + * https://github.com/conda-forge/staged-recipes + +In order to create a new conda build recipe, to be used when proposing packages to the conda-forge repository, we strongly suggest using the `grayskull` tool: + + .. code-block:: console + + python -m pip install grayskull + grayskull pypi xscen + +For more information on `grayskull`, please see the following link: https://github.com/conda/grayskull + +Before updating the main conda-forge recipe, we echo the conda-forge documentation and *strongly* suggest performing the following checks: + * Ensure that dependencies and dependency versions correspond with those of the tagged version, with open or pinned versions for the `host` requirements. + * If possible, configure tests within the conda-forge build CI (e.g. `imports: xscen`, `commands: pytest xscen`). + +Subsequent releases +^^^^^^^^^^^^^^^^^^^ + +If the conda-forge feedstock recipe is built from PyPI, then when a new release is published on PyPI, `regro-cf-autotick-bot` will open Pull Requests automatically on the conda-forge feedstock. It is up to the conda-forge feedstock maintainers to verify that the package is building properly before merging the Pull Request to the main branch. + +Building sources for wide support with `manylinux` image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + This section is for building source files that link to or provide links to C/C++ dependencies. + It is not necessary to perform the following when building pure Python packages. + +In order to do ensure best compatibility across architectures, we suggest building wheels using the `PyPA`'s `manylinux` docker images (at time of writing, we endorse using `manylinux_2_24_x86_64`). + +With `docker` installed and running, begin by pulling the image: + + .. code-block:: console + + sudo docker pull quay.io/pypa/manylinux_2_24_x86_64 + +From the xscen source folder we can enter into the docker container, providing access to the `src/xscen` source files by linking them to the running image: + + .. code-block:: console + + sudo docker run --rm -ti -v $(pwd):/src/xscen -w /src/xscen quay.io/pypa/manylinux_2_24_x86_64 bash + +Finally, to build the wheel, we run it against the provided Python3.9 binary: + + .. code-block:: console + + /opt/python/cp39-cp39m/bin/python -m build --sdist --wheel + +This will then place two files in `xscen/dist/` ("xscen-1.2.3-py3-none-any.whl" and "xscen-1.2.3.tar.gz"). +We can now leave our docker container (`exit`) and continue with uploading the files to PyPI: + + .. code-block:: console + + python -m twine upload dist/* diff --git a/environment-dev.yml b/environment-dev.yml index cd20dde0..9d4434da 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -2,11 +2,11 @@ name: xscen-dev channels: - conda-forge dependencies: - - python >=3.9,<3.13 - # Don't forget to sync changes between environment.yml, environment-dev.yml, and setup.py! + - python >=3.10,<3.13 + # Don't forget to sync changes between environment.yml, environment-dev.yml, and pyproject.toml! # Also consider updating the list in xs.utils.show_versions if you add a new package. # Main packages - - cartopy + - cartopy >=0.23.0 - cftime - cf_xarray >=0.7.6 - clisops >=0.10 @@ -17,32 +17,32 @@ dependencies: - h5netcdf - h5py - intake-esm >=2023.07.07 - - matplotlib + - matplotlib >=3.6 - netCDF4 - numcodecs - - numpy + - numpy >=1.24 - pandas >=2.2 - parse - pyyaml - rechunker - - scipy + - scipy >=1.10 - shapely >=2.0 - sparse - toolz - - xarray >=2023.11.0 - - xclim >=0.48.2 + - xarray >=2023.11.0, !=2024.6.0 + - xclim >=0.52.2, <0.53 - xesmf >=0.7 - - zarr + - zarr >=2.13 # Opt - nc-time-axis >=1.3.1 - pyarrow >=10.0.1 # Dev - babel - - black ==24.2.0 + - black ==24.8.0 - blackdoc ==0.3.9 - - bump-my-version >=0.18.3 -# - coverage>=6.2.2,<7.0.0 -# - coveralls>=3.3.1 + - bump-my-version >=0.25.1 + - coverage>=7.5.0 + - coveralls>=4.0.1 - flake8 >=6.1.0 - flake8-rst-docstrings>=0.3.0 - ipykernel @@ -51,29 +51,28 @@ dependencies: - jupyter_client - nbsphinx - nbval + - numpydoc >=1.8.0 - pandoc - pooch - - pre-commit >=3.3.2 - - pytest >=7.3.1 -# - pytest-cov >=4.0.0 - - ruff >=0.1.0 - - sphinx + - pre-commit >=3.5.0 + - pytest >=8.3.2 + - pytest-cov >=5.0.0 + - pytest-xdist >=3.2.0 + - ruff >=0.5.7 + - setuptools >=65.0.0 + - setuptools-scm >=8.0.0 + - sphinx >=7.0.0 - sphinx-autoapi - sphinx-rtd-theme >=1.0 - sphinxcontrib-napoleon - sphinx-codeautolink - sphinx-copybutton - sphinx-mdinclude - - watchdog >=3.0.0 + - watchdog >=4.0.0 - xdoctest # Testing - - tox >=4.5.1 + - tox >=4.17.1 + - tox-gh >=1.3.2 # packaging - - build + - conda-build - wheel - - pip - - pip: - # coverage is not available in conda-forge for Python3.12 - - coverage>=6.2.2,<7.0.0 - - coveralls>=3.3.1 - - pytest-cov >=4.0.0 diff --git a/environment.yml b/environment.yml index b54bd875..e5336ab8 100644 --- a/environment.yml +++ b/environment.yml @@ -2,11 +2,11 @@ name: xscen channels: - conda-forge dependencies: - - python >=3.9,<3.13 - # Don't forget to sync changes between environment.yml, environment-dev.yml, and setup.py! + - python >=3.10,<3.13 + # Don't forget to sync changes between environment.yml, environment-dev.yml, and pyproject.toml! # Also consider updating the list in xs.utils.show_versions if you add a new package. # Main packages - - cartopy + - cartopy >=0.23.0 - cftime - cf_xarray >=0.7.6 - clisops >=0.10 @@ -17,24 +17,25 @@ dependencies: - h5netcdf - h5py - intake-esm >=2023.07.07 - - matplotlib + - matplotlib >=3.6 - netCDF4 - numcodecs - - numpy + - numpy >=1.24 - pandas >=2.2 - parse - pyyaml - rechunker - - scipy + - scipy >=1.10 - shapely >=2.0 - sparse - toolz - - xarray >=2023.11.0 - - xclim >=0.48.2 + - xarray >=2023.11.0, !=2024.6.0 + - xclim >=0.52.2, <0.53 - xesmf >=0.7 - - zarr - # To install from source and get translations - - babel + - zarr >=2.13 + # To install from source + - setuptools >=65.0.0 + - setuptools-scm >=8.0.0 # Opt - nc-time-axis >=1.3.1 - pyarrow >=10.0.1 diff --git a/pyproject.toml b/pyproject.toml index 68ffa4aa..f88c3f5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,7 @@ [build-system] requires = [ - "setuptools >=60", + "setuptools >=65.0.0", "setuptools-scm >=8.0", - "babel", "wheel" ] build-backend = "setuptools.build_meta" @@ -10,12 +9,17 @@ build-backend = "setuptools.build_meta" [project] name = "xscen" authors = [ - {name = "Gabriel Rondeau-Genesse", email = "rondeau-genesse.gabriel@ouranos.ca"} + {name = "Gabriel Rondeau-Genesse", email = "rondeau-genesse.gabriel@ouranos.ca"}, + {name = "Pascal Bourgault", email = "bourgault.pascal@ouranos.ca"}, + {name = "Juliette Lavoie", email = "lavoie.juliette@ouranos.ca"} +] +maintainers = [ + {name = "Gabriel Rondeau-Genesse", email = "rondeau-genesse.gabriel@ouranos.ca"}, + {name = "Trevor James Smith", email = "smith.trevorj@ouranos.ca"} ] -maintainers = [] description = "A climate change scenario-building analysis framework, built with xclim/xarray." readme = "README.rst" -requires-python = ">=3.9.0" +requires-python = ">=3.10.0" keywords = ["xscen"] classifiers = [ "Development Status :: 4 - Beta", @@ -24,19 +28,18 @@ classifiers = [ "License :: OSI Approved :: Apache Software License", "Natural Language :: English", "Operating System :: OS Independent", - "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + # "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation :: CPython", "Topic :: Scientific/Engineering :: Atmospheric Science" ] dynamic = ["version"] dependencies = [ - "babel", - "cartopy", + "cartopy >=0.23.0", "cftime", "cf_xarray >=0.7.6", "clisops >=0.10", @@ -45,48 +48,50 @@ dependencies = [ "fsspec", "geopandas", "h5netcdf", - "h5py", - "intake-esm >=2023.07.07,=2023.07.07; python_version >= '3.10'", - "matplotlib", + "h5py", # <3.11", # writing and reading with engine h5netcdf was broken + "intake-esm >=2023.07.07", + "matplotlib >=3.6", "netCDF4", "numcodecs", - "numpy", + "numpy >=1.24", "pandas >=2.2", "parse", # Used when opening catalogs. "pyarrow>=10.0.1", "pyyaml", "rechunker", - "scipy", + "scipy >=1.10", "shapely >=2.0", "sparse", "toolz", - "xarray >=2023.11.0", - "xclim >=0.48.2", - "zarr" + "xarray >=2023.11.0, !=2024.6.0", + "xclim >=0.52.2, <0.53", + "zarr >=2.13" ] [project.optional-dependencies] dev = [ # Dev tools and testing - "pip >=23.3.0", - "black ==24.4.2", + "pip >=24.2.0", + "babel", + "black[jupyter] ==24.8.0", "blackdoc ==0.3.9", - "bump-my-version >=0.18.3", - "coverage >=6.2.2,<8.0.0", - "coveralls >=3.3.1", - "flake8-alphabetize >=0.0.21", + "bump-my-version >=0.26.0", + "coverage >=7.5.0", + "coveralls >=4.0.1", + "flake8 >=7.1.1", "flake8-rst-docstrings >=0.3.0", - "flake8 >=6.1.0", "isort ==5.13.2", + "mypy", + "numpydoc >=1.8.0", "pooch", "pre-commit >=3.3.2", - "pytest-cov >=4.0.0", - "pytest >=7.3.1", - "ruff >=0.1.0", - "tox >=4.5.1", - "watchdog >=3.0.0", + "pytest-cov >=5.0.0", + "pytest >=8.3.2", + "pytest-xdist[psutil] >=3.2.0", + "ruff >=0.5.7", + "tox >=4.18.0", + "watchdog >=4.0.0", "xdoctest" ] docs = [ @@ -96,7 +101,7 @@ docs = [ "jupyter_client", "nbsphinx", "nbval", - "sphinx", + "sphinx >=7.0.0", "sphinx-autoapi", "sphinx-codeautolink", "sphinx-copybutton", @@ -108,11 +113,12 @@ docs = [ extra = [ "xesmf>=0.7" ] +all = ["xscen[dev]", "xscen[docs]", "xscen[extra]"] [project.urls] -"About Ouranos" = "https://www.ouranos.ca/en/" -"Changelog" = "https://xscen.readthedocs.io/en/stable/changes.html" "Homepage" = "https://xscen.readthedocs.io/" +"Changelog" = "https://xscen.readthedocs.io/en/stable/changelog.html" +"About Ouranos" = "https://ouranos.ca/en/" "Issue tracker" = "https://github.com/Ouranosinc/xscen/issues" "Source" = "https://github.com/Ouranosinc/xscen" @@ -120,19 +126,19 @@ extra = [ [tool.black] target-version = [ - "py39", "py310", "py311", - "py312" + "py312", + "py313" ] [tool.bumpversion] -current_version = "0.9.0" +current_version = "0.10.1-dev.1" commit = true commit_args = "--no-verify" tag = false tag_name = "v{new_version}" -allow_dirty = false +allow_dirty = true parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)(\\-(?P[a-z]+)(\\.(?P\\d+)))?" serialize = [ "{major}.{minor}.{patch}-{release}.{build}", @@ -140,7 +146,7 @@ serialize = [ ] [[tool.bumpversion.files]] -filename = "xscen/__init__.py" +filename = "src/xscen/__init__.py" search = "__version__ = \"{current_version}\"" replace = "__version__ = \"{new_version}\"" @@ -159,43 +165,78 @@ values = [ "release" ] +[tool.coverage.paths] +source = ["src/xscen/", "*/site-packages/xscen/"] + [tool.coverage.run] +omit = ["docs/notebooks/*.ipynb", "tests/*.py", "src/xscen/reduce.py"] # FIXME: Remove xscen/reduce.py when it's fully deleted. relative_files = true -include = ["xscen/*"] -omit = ["docs/notebooks/*.ipynb", "tests/*.py", "xscen/reduce.py"] # FIXME: Remove xscen/reduce.py when it's fully deleted. +source = ["xscen"] [tool.isort] append_only = true known_first_party = "xscen" profile = "black" -py_version = 39 +py_version = 310 [tool.mypy] -python_version = 3.9 +files = "." +python_version = 3.10 show_error_codes = true +strict = true +warn_no_return = true warn_return_any = true +warn_unreachable = true warn_unused_configs = true [[tool.mypy.overrides]] -module = [] +module = [ + # Don't require test functions to include types + "tests.*" +] +allow_untyped_defs = true +disable_error_code = "attr-defined" ignore_missing_imports = true +[tool.numpydoc_validation] +checks = [ + "all", # report on all checks, except the below + "EX01", + "SA01", + "ES01" +] +# remember to use single quotes for regex in TOML +exclude = [ + # don't report on objects that match any of these regex + '\.undocumented_method$', + '\.__repr__$' +] +override_SS05 = [ + # override SS05 to allow docstrings starting with these words + '^Process ', + '^Assess ', + '^Access ' +] + [tool.pytest.ini_options] addopts = [ "--color=yes", "--cov=xscen", "--ignore-glob='*.ipynb_checkpoints'", + "--strict-config", "--strict-markers", - "--verbose" + "--verbose", + "--numprocesses=0", + "--maxprocesses=8", + "--dist=worksteal" ] filterwarnings = ["ignore::UserWarning"] testpaths = "tests" markers = ["requires_netcdf: marks tests that require netcdf files to run"] [tool.ruff] -src = ["xscen"] +src = ["src/xscen"] line-length = 150 -target-version = "py39" exclude = [ ".eggs", ".git", @@ -204,20 +245,43 @@ exclude = [ ] [tool.ruff.format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false line-ending = "auto" [tool.ruff.lint] +extend-select = [ + "RUF022" # unsorted-dunder-all +] ignore = [ - "D205", - "D400", - "D401" + "COM", # commas + "D205", # blank-line-after-summary + "D400", # ends-in-period + "D401", # non-imperative-mood + # The following are disabled because the codebase is not yet compliant. + "N801", # invalid-class-name + "N806", # non-lowercase-variable-in-function + "PERF203", # try-except-in-loop + "PERF401", # manual-list-comprehension + "S110" # try-except-pass ] +preview = true select = [ - "C9", - "D", - "E", - "F", - "W" + "BLE", # blind-except + "C90", # mccabe-complexity + "D", # docstrings + "E", # pycodestyle errors + "FLY002", # static-join-to-fstring + "G", # logging-format + "N", # naming conventions + "PERF", # iterator performance + "PTH", # pathlib + "RUF010", # explicit-f-string-type-conversion + "RUF013", # implicit-optional + "S", # bandit + "UP", # python version conventions + "W" # pycodestyle warnings ] [tool.ruff.lint.flake8-bandit] @@ -227,15 +291,16 @@ check-typed-exception = true known-first-party = ["xscen"] case-sensitive = true detect-same-package = false -lines-after-imports = 1 +lines-after-imports = 2 no-lines-before = ["future", "standard-library"] [tool.ruff.lint.mccabe] max-complexity = 15 [tool.ruff.lint.per-file-ignores] -"xscen/**/__init__.py" = ["F401", "F403"] -"tests/**/*.py" = ["D100", "D101", "D102", "D103"] +"docs/**/*.py" = ["E402"] +"src/xscen/**/__init__.py" = ["F401", "F403"] +"tests/**/*.py" = ["D100", "D101", "D102", "D103", "S101"] [tool.ruff.lint.pycodestyle] max-doc-length = 180 @@ -251,14 +316,14 @@ include-package-data = true version = {attr = "xscen.__version__"} [tool.setuptools.packages.find] -where = ["."] +where = ["src"] include = ["xscen"] # [tool.setuptools.packages.find] # include = [ # ".zenodo.json", # "AUTHORS.rst", -# "CHANGES.rst", +# "CHANGELOG.rst", # "CONTRIBUTING.rst", # "LICENSE", # "Makefile", @@ -274,15 +339,13 @@ include = ["xscen"] # "docs/notebooks/samples/*.yml", # "environment.yml", # "environment-dev.yml", -# "setup.cfg", -# "setup.py", +# "src/xscen/**/*.py", +# "src/xscen/**/*.yml", +# "src/xscen/CVs/*.json", +# "src/xscen/data/*.csv", +# "src/xscen/data/**/*.mo", +# "src/xscen/data/**/*.po", # "tests/*.py", -# "xscen/**/*.py", -# "xscen/**/*.yml", -# "xscen/CVs/*.json", -# "xscen/data/*.csv", -# "xscen/data/**/*.mo", -# "xscen/data/**/*.po", # "tox.ini" # ] # exclude = [ diff --git a/scripts/global_tas_average_obs.ipynb b/scripts/global_tas_average_obs.ipynb index 43bec101..f43e07ed 100644 --- a/scripts/global_tas_average_obs.ipynb +++ b/scripts/global_tas_average_obs.ipynb @@ -27,12 +27,8 @@ }, "outputs": [], "source": [ - "import os\n", "import re\n", - "import sys\n", - "\n", - "os.environ[\"ESMFMKFILE\"] = os.path.join(sys.prefix, \"lib\", \"esmf.mk\")\n", - "from datetime import datetime, timedelta\n", + "from pathlib import Path\n", "\n", "import dask\n", "import matplotlib.pyplot as plt\n", @@ -58,7 +54,7 @@ "source": [ "# WMO's reference period\n", "ref_period = [1981, 2010]\n", - "# Tempertarue difference between the reference period and 1850-1900\n", + "# Temperature difference between the reference period and 1850-1900\n", "# See source. Computed according to the IPCC's AR6 WG1.\n", "ref_delta = 0.69\n", "\n", @@ -110,14 +106,17 @@ "outputs": [], "source": [ "# Get data\n", - "with open(\"Berkeley_data.txt\", \"wb\") as f:\n", + "file = Path(\"Berkeley_data.txt\")\n", + "\n", + "with file.open(\"wb\") as f:\n", " res = requests.get(\n", - " \"https://berkeley-earth-temperature.s3.us-west-1.amazonaws.com/Global/Land_and_Ocean_summary.txt\"\n", + " \"https://berkeley-earth-temperature.s3.us-west-1.amazonaws.com/Global/Land_and_Ocean_summary.txt\",\n", + " timeout=15,\n", " )\n", " f.write(res.content)\n", "\n", "df = pd.read_table(\n", - " \"Berkeley_data.txt\",\n", + " file,\n", " skiprows=58,\n", " usecols=[0, 1],\n", " names=[\"year\", \"temp\"],\n", @@ -127,20 +126,19 @@ "da = df.temp.to_xarray().assign_attrs(units=\"°C\")\n", "\n", "# Get global average for the reference period of the data\n", - "with open(\"Berkeley_data.txt\") as f:\n", + "with file.open(\"r\") as f:\n", " for line in f:\n", " if \"% Estimated Jan 1951-Dec 1980 global mean temperature (C)\" in line:\n", " data = re.search(r\"(\\d{2}.\\d{3})\", next(f))\n", " break\n", - "refAvg = float(data.groups()[0])\n", - "refAvg\n", + "ref_avg = float(data.groups()[0])\n", "\n", - "daAbs = da + refAvg\n", + "da_abs = da + ref_avg\n", "\n", - "daWMO = clean(da)\n", + "da_wmo = clean(da)\n", "\n", - "temps.append(daAbs.expand_dims(source=[\"Berkeley-Raw\"]))\n", - "temps.append(daWMO.expand_dims(source=[\"Berkeley\"]))" + "temps.append(da_abs.expand_dims(source=[\"Berkeley-Raw\"]))\n", + "temps.append(da_wmo.expand_dims(source=[\"Berkeley\"]))" ] }, { @@ -154,10 +152,10 @@ "source": [ "# A figure to look at it\n", "fig, ax = plt.subplots(figsize=(10, 3))\n", - "(daAbs - daAbs.sel(year=slice(1850, 1900)).mean()).plot(ax=ax, label=\"Raw\")\n", - "daWMO.plot(ax=ax, label=\"WMO\")\n", + "(da_abs - da_abs.sel(year=slice(1850, 1900)).mean()).plot(ax=ax, label=\"Raw\")\n", + "da_wmo.plot(ax=ax, label=\"WMO\")\n", "ax.set_title(\n", - " \"Global Average Temperature according to Bekerley - anomalies vs 1850-1900\"\n", + " \"Global Average Temperature according to Berkeley - anomalies vs 1850-1900\"\n", ")\n", "ax.set_xlabel(\"years\")\n", "ax.set_ylabel(\"[°C]\")\n", @@ -193,9 +191,9 @@ ")\n", "da = df[\"J-D\"].to_xarray().rename(Year=\"year\").rename(\"temp\").assign_attrs(units=\"°C\")\n", "\n", - "daWMO = clean(da)\n", + "da_wmo = clean(da)\n", "\n", - "temps.append(daWMO.expand_dims(source=[\"GISTEMPv4\"]))" + "temps.append(da_wmo.expand_dims(source=[\"GISTEMPv4\"]))" ] }, { @@ -231,9 +229,9 @@ " .assign_attrs(units=\"°C\")\n", ")\n", "\n", - "daWMO = clean(da)\n", + "da_wmo = clean(da)\n", "\n", - "temps.append(daWMO.expand_dims(source=[\"HadCRUT5\"]))" + "temps.append(da_wmo.expand_dims(source=[\"HadCRUT5\"]))" ] }, { @@ -266,9 +264,9 @@ "\n", "da = df.temp.to_xarray().assign_attrs(units=\"°C\")\n", "\n", - "daWMO = clean(da)\n", + "da_wmo = clean(da)\n", "\n", - "temps.append(daWMO.expand_dims(source=[\"NOAAGlobalTempv5\"]))" + "temps.append(da_wmo.expand_dims(source=[\"NOAAGlobalTempv5\"]))" ] }, { @@ -340,8 +338,8 @@ "da = da.assign_coords(time=da.time.dt.year).rename(time=\"year\")\n", "\n", "with ProgressBar():\n", - " daWMO = clean(da).load()\n", - "temps.append(daWMO.expand_dims(source=[\"JRA-55\"]))" + " da_wmo = clean(da).load()\n", + "temps.append(da_wmo.expand_dims(source=[\"JRA-55\"]))" ] }, { @@ -375,7 +373,7 @@ "outputs": [], "source": [ "# A figure to look at it\n", - "fig, ax = plt.subplots(figsize=(10, 3))\n", + "fig, ax = plt.subplots(figsize=(10, 3)) # noqa\n", "ds.plot(ax=ax, hue=\"source\")\n", "ax.set_title(\"Global Average Temperature - Obs\")\n", "ax.set_xlabel(\"years\")\n", @@ -436,8 +434,7 @@ "source": [ "db2 = xr.concat([db, ds2], \"simulation\")\n", "db2.attrs[\"description\"] = (\n", - " db.attrs[\"description\"]\n", - " + \" Observational datasets were also added following the WMO guidelines.\"\n", + " f\"{db.attrs['description']} Observational datasets were also added following the WMO guidelines.\"\n", ")" ] }, @@ -464,14 +461,6 @@ "source": [ "db2.tas.plot(hue=\"simulation\", add_legend=False)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "25", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 41eb2465..00000000 --- a/setup.cfg +++ /dev/null @@ -1,17 +0,0 @@ -[extract_messages] -output_file = xscen.pot -keywords = _ gettext ngettext - -[init_catalog] -domain = xscen -input_file = xscen.pot -output_dir = xscen/data - -[update_catalog] -domain = xscen -input_file = xscen.pot -output_dir = xscen/data - -[compile_catalog] -domain = xscen -directory = xscen/data diff --git a/setup.py b/setup.py deleted file mode 100644 index 127a38be..00000000 --- a/setup.py +++ /dev/null @@ -1,25 +0,0 @@ -"""Custom installation process for xscen translations.""" - -from babel.messages.frontend import compile_catalog -from setuptools import setup -from setuptools.build_meta import * # noqa: F403, F401 -from setuptools.command.install import install - - -class InstallWithCompile(install): - """Injection of the catalog compilation in the installation process.""" - - def run(self): - """Install the package, but compile the i18n catalogs first.""" - compiler = compile_catalog(self.distribution) - option_dict = self.distribution.get_option_dict("compile_catalog") - compiler.domain = [option_dict["domain"][1]] - compiler.directory = option_dict["directory"][1] - compiler.run() - super().run() - - -setup( - cmdclass={"install": InstallWithCompile}, - message_extractors={"xscen": [("**.py", "python", None)]}, -) diff --git a/xscen/CVs/frequency_to_timedelta.json b/src/xscen/CVs/frequency_to_timedelta.json similarity index 100% rename from xscen/CVs/frequency_to_timedelta.json rename to src/xscen/CVs/frequency_to_timedelta.json diff --git a/xscen/CVs/frequency_to_xrfreq.json b/src/xscen/CVs/frequency_to_xrfreq.json similarity index 100% rename from xscen/CVs/frequency_to_xrfreq.json rename to src/xscen/CVs/frequency_to_xrfreq.json diff --git a/xscen/CVs/infer_resolution.json b/src/xscen/CVs/infer_resolution.json similarity index 100% rename from xscen/CVs/infer_resolution.json rename to src/xscen/CVs/infer_resolution.json diff --git a/xscen/CVs/resampling_methods.json b/src/xscen/CVs/resampling_methods.json similarity index 100% rename from xscen/CVs/resampling_methods.json rename to src/xscen/CVs/resampling_methods.json diff --git a/xscen/CVs/variable_names.json b/src/xscen/CVs/variable_names.json similarity index 100% rename from xscen/CVs/variable_names.json rename to src/xscen/CVs/variable_names.json diff --git a/xscen/CVs/xrfreq_to_frequency.json b/src/xscen/CVs/xrfreq_to_frequency.json similarity index 100% rename from xscen/CVs/xrfreq_to_frequency.json rename to src/xscen/CVs/xrfreq_to_frequency.json diff --git a/xscen/CVs/xrfreq_to_timedelta.json b/src/xscen/CVs/xrfreq_to_timedelta.json similarity index 100% rename from xscen/CVs/xrfreq_to_timedelta.json rename to src/xscen/CVs/xrfreq_to_timedelta.json diff --git a/xscen/__init__.py b/src/xscen/__init__.py similarity index 56% rename from xscen/__init__.py rename to src/xscen/__init__.py index ff741d6a..e0cc51ab 100644 --- a/xscen/__init__.py +++ b/src/xscen/__init__.py @@ -1,5 +1,23 @@ """A climate change scenario-building analysis framework, built with xclim/xarray.""" +################################################################################### +# Apache Software License 2.0 +# +# Copyright (c) 2024, Gabriel Rondeau-Genesse, Pascal Bourgault, Juliette Lavoie, Trevor James Smith +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################### + import warnings # Import the submodules @@ -51,13 +69,35 @@ __author__ = """Gabriel Rondeau-Genesse""" __email__ = "rondeau-genesse.gabriel@ouranos.ca" -__version__ = "0.9.0" +__version__ = "0.10.1-dev.1" def warning_on_one_line( message: str, category: Warning, filename: str, lineno: int, file=None, line=None ): - """Monkeypatch Reformat warning so that `warnings.warn` doesn't mention itself.""" + """ + Monkeypatch Reformat warning so that `warnings.warn` doesn't mention itself. + + Parameters + ---------- + message : str + The warning message. + category : Warning + The warning category. + filename : str + The filename where the warning was raised. + lineno : int + The line number where the warning was raised. + file : file + The file where the warning was raised. + line : str + The line where the warning was raised. + + Returns + ------- + str + The reformatted warning message. + """ return f"{filename}:{lineno}: {category.__name__}: {message}\n" diff --git a/xscen/aggregate.py b/src/xscen/aggregate.py similarity index 88% rename from xscen/aggregate.py rename to src/xscen/aggregate.py index 7bf04dee..756e8bb5 100644 --- a/xscen/aggregate.py +++ b/src/xscen/aggregate.py @@ -8,7 +8,6 @@ from copy import deepcopy from pathlib import Path from types import ModuleType -from typing import Optional, Union import geopandas as gpd import numpy as np @@ -34,7 +33,6 @@ logger = logging.getLogger(__name__) __all__ = [ - "climatological_mean", "climatological_op", "compute_deltas", "produce_horizon", @@ -47,73 +45,15 @@ def _(s): return s -@parse_config -def climatological_mean( - ds: xr.Dataset, - *, - window: Optional[int] = None, - min_periods: Optional[int] = None, - interval: int = 1, - periods: Optional[Union[list[str], list[list[str]]]] = None, - to_level: Optional[str] = "climatology", -) -> xr.Dataset: - """Compute the mean over 'year' for given time periods, respecting the temporal resolution of ds. - - Parameters - ---------- - ds : xr.Dataset - Dataset to use for the computation. - window : int, optional - Number of years to use for the time periods. - If left at None and periods is given, window will be the size of the first period. - If left at None and periods is not given, the window will be the size of the input dataset. - min_periods : int, optional - For the rolling operation, minimum number of years required for a value to be computed. - If left at None and the xrfreq is either QS or AS and doesn't start in January, min_periods will be one less than window. - If left at None, it will be deemed the same as 'window'. - interval : int - Interval (in years) at which to provide an output. - periods : list of str or list of lists of str, optional - Either [start, end] or list of [start, end] of continuous periods to be considered. - This is needed when the time axis of ds contains some jumps in time. - If None, the dataset will be considered continuous. - to_level : str, optional - The processing level to assign to the output. - If None, the processing level of the inputs is preserved. - - Returns - ------- - xr.Dataset - Returns a Dataset of the climatological mean, by calling climatological_op with option op=='mean'. - - """ - warnings.warn( - "xs.climatological_mean is deprecated and will be abandoned in a future release. " - "Use xs.climatological_op with option op=='mean' instead.", - category=FutureWarning, - ) - return climatological_op( - ds, - op="mean", - window=window, - min_periods=min_periods, - stride=interval, - periods=periods, - rename_variables=False, - to_level=to_level, - horizons_as_dim=False, - ) - - @parse_config def climatological_op( # noqa: C901 ds: xr.Dataset, *, - op: Union[str, dict] = "mean", - window: Optional[int] = None, - min_periods: Optional[Union[int, float]] = None, + op: str | dict = "mean", + window: int | None = None, + min_periods: int | float | None = None, stride: int = 1, - periods: Optional[Union[list[str], list[list[str]]]] = None, + periods: list[str] | list[list[str]] | None = None, rename_variables: bool = True, to_level: str = "climatology", horizons_as_dim: bool = False, @@ -506,11 +446,11 @@ def _ulinregress(x, y, **kwargs): @parse_config def compute_deltas( # noqa: C901 ds: xr.Dataset, - reference_horizon: Union[str, xr.Dataset], + reference_horizon: str | xr.Dataset, *, - kind: Union[str, dict] = "+", + kind: str | dict = "+", rename_variables: bool = True, - to_level: Optional[str] = "deltas", + to_level: str | None = "deltas", ) -> xr.Dataset: """Compute deltas in comparison to a reference time period, respecting the temporal resolution of ds. @@ -702,13 +642,13 @@ def spatial_mean( # noqa: C901 ds: xr.Dataset, method: str, *, - spatial_subset: Optional[bool] = None, - call_clisops: Optional[bool] = False, - region: Optional[Union[dict, str]] = None, - kwargs: Optional[dict] = None, - simplify_tolerance: Optional[float] = None, - to_domain: Optional[str] = None, - to_level: Optional[str] = None, + spatial_subset: bool | None = None, + call_clisops: bool | None = False, + region: dict | str | None = None, + kwargs: dict | None = None, + simplify_tolerance: float | None = None, + to_domain: str | None = None, + to_level: str | None = None, ) -> xr.Dataset: """Compute the spatial mean using a variety of available methods. @@ -767,24 +707,6 @@ def spatial_mean( # noqa: C901 xarray.Dataset.mean, xarray.Dataset.interp, xesmf.SpatialAverager """ kwargs = kwargs or {} - if method == "mean": - warnings.warn( - "xs.spatial_mean with method=='mean' is deprecated and will be abandoned in a future release. " - "Use method=='cos-lat' instead for a more robust but similar method.", - category=FutureWarning, - ) - elif method == "interp_coord": - warnings.warn( - "xs.spatial_mean with method=='interp_coord' is deprecated. Use method=='interp_centroid' instead.", - category=FutureWarning, - ) - method = "interp_centroid" - if call_clisops: - warnings.warn( - "call_clisops has been renamed and is deprecated. Use spatial_subset instead.", - category=FutureWarning, - ) - spatial_subset = call_clisops if region == "global": region = { @@ -798,21 +720,6 @@ def spatial_mean( # noqa: C901 else: region["lon_bnds"] = [-180, 180] - if ( - (region is not None) - and (region["method"] in region) - and (isinstance(region[region["method"]], dict)) - ): - warnings.warn( - "You seem to be using a deprecated version of region. Please use the new formatting.", - category=FutureWarning, - ) - region = deepcopy(region) - if "buffer" in region: - region["tile_buffer"] = region.pop("buffer") - _kwargs = region.pop(region["method"]) - region.update(_kwargs) - if ( (region is not None) and (spatial_subset is None) @@ -833,14 +740,14 @@ def spatial_mean( # noqa: C901 ) if "units" not in ds.cf["latitude"].attrs: - logger.warning( - f"{ds.attrs.get('cat:id', '')}: Latitude does not appear to have units. Make sure that the computation is right." - ) + msg = f"{ds.attrs.get('cat:id', '')}: Latitude does not appear to have units. Make sure that the computation is right." + logger.warning(msg) elif ds.cf["latitude"].attrs["units"] != "degrees_north": - logger.warning( + msg = ( f"{ds.attrs.get('cat:id', '')}: Latitude units is '{ds.cf['latitude'].attrs['units']}', expected 'degrees_north'. " - f"Make sure that the computation is right." + "Make sure that the computation is right." ) + logger.warning(msg) if ((ds.cf["longitude"].min() < -160) & (ds.cf["longitude"].max() > 160)) or ( (ds.cf["longitude"].min() < 20) & (ds.cf["longitude"].max() > 340) @@ -873,19 +780,6 @@ def spatial_mean( # noqa: C901 f"weighted mean(dim={[d for d in ds.cf.axes['X'] + ds.cf.axes['Y']]}) using a 'cos-lat' approximation of areacella (in deg2)" ) - # This simply calls .mean() over the spatial dimensions - elif method == "mean": - if "dim" not in kwargs: - kwargs["dim"] = ds.cf.axes["X"] + ds.cf.axes["Y"] - - ds_agg = ds.mean(keep_attrs=True, **kwargs) - - # Prepare the History field - new_history = ( - f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] " - f"xarray.mean(dim={kwargs['dim']}) - xarray v{xr.__version__}" - ) - # This calls .interp() to a pair of coordinates elif method == "interp_centroid": # Find the centroid @@ -896,7 +790,7 @@ def spatial_mean( # noqa: C901 kwargs[ds.cf.axes["Y"][0]] = ds[ds.cf.axes["Y"][0]].mean().values else: if region["method"] == "gridpoint": - if len(region["lon"] != 1): + if len(region["lon"]) != 1: raise ValueError( "Only a single location should be used with interp_centroid." ) @@ -1034,18 +928,17 @@ def spatial_mean( # noqa: C901 @parse_config def produce_horizon( # noqa: C901 ds: xr.Dataset, - indicators: Union[ - str, - os.PathLike, - Sequence[Indicator], - Sequence[tuple[str, Indicator]], - ModuleType, - ], + indicators: ( + str + | os.PathLike + | Sequence[Indicator] + | Sequence[tuple[str, Indicator]] + | ModuleType + ), *, - periods: Optional[Union[list[str], list[list[str]]]] = None, - warminglevels: Optional[dict] = None, - to_level: Optional[str] = "horizons", - period: Optional[list] = None, + periods: list[str] | list[list[str]] | None = None, + warminglevels: dict | None = None, + to_level: str | None = "horizons", ) -> xr.Dataset: """ Compute indicators, then the climatological mean, and finally unstack dates in order @@ -1084,18 +977,12 @@ def produce_horizon( # noqa: C901 "If you want to use produce_horizon for multiple warming levels, " "extract the full time series and use the `warminglevels` argument instead." ) - if period is not None: - warnings.warn( - "The 'period' argument is deprecated and will be removed in a future version. Use 'periods' instead.", - category=FutureWarning, - ) - periods = [standardize_periods(period, multiple=False)] all_periods = [] if periods is not None: all_periods.extend(standardize_periods(periods)) if warminglevels is not None: - if isinstance(warminglevels["wl"], (int, float)): + if isinstance(warminglevels["wl"], int | float): all_periods.append(warminglevels) elif isinstance(warminglevels["wl"], list): template = deepcopy(warminglevels) diff --git a/xscen/biasadjust.py b/src/xscen/biasadjust.py similarity index 89% rename from xscen/biasadjust.py rename to src/xscen/biasadjust.py index 63306489..0df7bd4f 100644 --- a/xscen/biasadjust.py +++ b/src/xscen/biasadjust.py @@ -2,12 +2,10 @@ import logging from copy import deepcopy -from typing import Optional, Union import xarray as xr import xclim as xc from xclim import sdba -from xclim.core.calendar import convert_calendar, get_calendar from .catutils import parse_from_ds from .config import parse_config @@ -58,17 +56,17 @@ def _add_preprocessing_attr(scen, train_kwargs): def train( dref: xr.Dataset, dhist: xr.Dataset, - var: Union[str, list[str]], + var: str | list[str], period: list[str], *, method: str = "DetrendedQuantileMapping", - group: Optional[Union[sdba.Grouper, str, dict]] = None, - xclim_train_args: Optional[dict] = None, + group: sdba.Grouper | str | dict | None = None, + xclim_train_args: dict | None = None, maximal_calendar: str = "noleap", - adapt_freq: Optional[dict] = None, - jitter_under: Optional[dict] = None, - jitter_over: Optional[dict] = None, - align_on: Optional[str] = "year", + adapt_freq: dict | None = None, + jitter_under: dict | None = None, + jitter_over: dict | None = None, + align_on: str | None = "year", ) -> xr.Dataset: """ Train a bias-adjustment. @@ -100,7 +98,7 @@ def train( jitter_over: dict, optional If given, a dictionary of args to pass to `jitter_over_thresh`. align_on: str, optional - `align_on` argument for the function `xclim.core.calendar.convert_calendar`. + `align_on` argument for the function `xr.DataArray.convert_calendar`. Returns ------- @@ -137,13 +135,13 @@ def train( ref = ref.sel(time=slice(period[0], period[1])) # convert calendar if necessary - simcal = get_calendar(hist) - refcal = get_calendar(ref) + simcal = hist.time.dt.calendar + refcal = ref.time.dt.calendar mincal = minimum_calendar(simcal, maximal_calendar) if simcal != mincal: - hist = convert_calendar(hist, mincal, align_on=align_on) + hist = hist.convert_calendar(mincal, align_on=align_on) if refcal != mincal: - ref = convert_calendar(ref, mincal, align_on=align_on) + ref = ref.convert_calendar(mincal, align_on=align_on) if group: if isinstance(group, dict): @@ -194,13 +192,13 @@ def train( def adjust( dtrain: xr.Dataset, dsim: xr.Dataset, - periods: Union[list[str], list[list[str]]], + periods: list[str] | list[list[str]], *, - xclim_adjust_args: Optional[dict] = None, + xclim_adjust_args: dict | None = None, to_level: str = "biasadjusted", - bias_adjust_institution: Optional[str] = None, - bias_adjust_project: Optional[str] = None, - align_on: Optional[str] = "year", + bias_adjust_institution: str | None = None, + bias_adjust_project: str | None = None, + align_on: str | None = "year", ) -> xr.Dataset: """ Adjust a simulation. @@ -223,7 +221,7 @@ def adjust( bias_adjust_project : str, optional The project to assign to the output. align_on: str, optional - `align_on` argument for the fonction `xclim.core.calendar.convert_calendar`. + `align_on` argument for the function `xr.DataArray.convert_calendar`. Returns ------- @@ -240,7 +238,8 @@ def adjust( # evaluate the dict that was stored as a string if not isinstance(dtrain.attrs["train_params"], dict): - dtrain.attrs["train_params"] = eval(dtrain.attrs["train_params"]) + # FIXME: eval is bad. There has to be a better way!™ + dtrain.attrs["train_params"] = eval(dtrain.attrs["train_params"]) # noqa: S307 var = dtrain.attrs["train_params"]["var"] if len(var) != 1: @@ -252,10 +251,10 @@ def adjust( sim = dsim[var] # get right calendar - simcal = get_calendar(sim) + simcal = sim.time.dt.calendar mincal = minimum_calendar(simcal, dtrain.attrs["train_params"]["maximal_calendar"]) if simcal != mincal: - sim = convert_calendar(sim, mincal, align_on=align_on) + sim = sim.convert_calendar(mincal, align_on=align_on) # adjust ADJ = sdba.adjustment.TrainAdjust.from_dataset(dtrain) diff --git a/xscen/catalog.py b/src/xscen/catalog.py similarity index 80% rename from xscen/catalog.py rename to src/xscen/catalog.py index 601ddcd7..08690228 100644 --- a/xscen/catalog.py +++ b/src/xscen/catalog.py @@ -6,13 +6,13 @@ import logging import os import re -import warnings +import shutil as sh from collections.abc import Mapping, Sequence from copy import deepcopy from functools import reduce from operator import or_ from pathlib import Path -from typing import Any, Optional, Union +from typing import Any import fsspec as fs import intake_esm @@ -37,8 +37,8 @@ __all__ = [ "COLUMNS", - "DataCatalog", "ID_COLUMNS", + "DataCatalog", "ProjectCatalog", "concat_data_catalogs", "generate_id", @@ -198,10 +198,10 @@ def __init__( @classmethod def from_df( cls, - data: Union[pd.DataFrame, os.PathLike, Sequence[os.PathLike]], - esmdata: Optional[Union[os.PathLike, dict]] = None, + data: pd.DataFrame | os.PathLike | Sequence[os.PathLike], + esmdata: os.PathLike | dict | None = None, *, - read_csv_kwargs: Optional[Mapping[str, Any]] = None, + read_csv_kwargs: Mapping[str, Any] | None = None, name: str = "virtual", **intake_kwargs, ): @@ -236,7 +236,7 @@ def from_df( ).reset_index(drop=True) if isinstance(esmdata, os.PathLike): - with open(esmdata) as f: + with Path(esmdata).open(encoding="utf-8") as f: esmdata = json.load(f) elif esmdata is None: esmdata = deepcopy(esm_col_data) @@ -263,7 +263,7 @@ def _find_unique(series): else: return data.apply(_find_unique, result_type="reduce").to_dict() - def unique(self, columns: Optional[Union[str, Sequence[str]]] = None): + def unique(self, columns: str | Sequence[str] | None = None): """Return a series of unique values in the catalog. Parameters @@ -309,7 +309,7 @@ def search(self, **columns): ) return cat - def drop_duplicates(self, columns: Optional[list[str]] = None): + def drop_duplicates(self, columns: list[str] | None = None): """Drop duplicates in the catalog based on a subset of columns. Parameters @@ -356,9 +356,8 @@ def check_existing(row): path = Path(row.path) exists = (path.is_dir() and path.suffix == ".zarr") or (path.is_file()) if not exists: - logger.info( - f"File {path} was not found on disk, removing from catalog." - ) + msg = f"File {path} was not found on disk, removing from catalog." + logger.info(msg) return exists # In case variables were deleted manually in a Zarr, double-check that they still exist @@ -384,14 +383,13 @@ def check_variables(row): if len_df > 0: self.esmcat._df["variable"] = self.df.apply(check_variables, axis=1) - def exists_in_cat(self, verbose=True, **columns) -> bool: + def exists_in_cat(self, **columns) -> bool: """ Check if there is an entry in the catalogue corresponding to the arguments given. Parameters ---------- columns: Arguments that will be given to `catalog.search` - verbose: Log the result of the search. Returns ------- @@ -399,16 +397,17 @@ def exists_in_cat(self, verbose=True, **columns) -> bool: True if there is an entry in the catalogue corresponding to the arguments given. """ exists = bool(len(self.search(**columns))) - if exists and verbose: - logger.info(f"An entry exists for: {columns}") + if exists: + msg = f"An entry exists for: {columns}" + logger.info(msg) return exists def to_dataset( self, - concat_on: Optional[Union[list[str], str]] = None, - create_ensemble_on: Optional[Union[list[str], str]] = None, - ensemble_name: Optional[Union[list[str]]] = None, - calendar: Optional[str] = "standard", + concat_on: list[str] | str | None = None, + create_ensemble_on: list[str] | str | None = None, + ensemble_name: list[str] | None = None, + calendar: str | None = "standard", **kwargs, ) -> xr.Dataset: """ @@ -437,12 +436,12 @@ def to_dataset( If None, this will be the same as `create_ensemble_on`. The resulting coordinate must be unique. calendar : str, optional - If `create_ensemble_on` is given, all datasets are converted to this calendar before concatenation. - Ignored otherwise (default). If None, no conversion is done. - `align_on` is always "date". + If `create_ensemble_on` is given but not `preprocess`, all datasets are converted to this calendar before concatenation. + Ignored otherwise (default). If None, no conversion is done. `align_on` is always "date". + If `preprocess` is given, it must do the needed calendar handling. kwargs: Any other arguments are passed to :py:meth:`~intake_esm.core.esm_datastore.to_dataset_dict`. - The `preprocess` argument cannot be used if `create_ensemble_on` is given. + The `preprocess` argument must convert calendars as needed if `create_ensemble_on` is given. Returns ------- @@ -494,10 +493,6 @@ def to_dataset( ) if create_ensemble_on: - if kwargs.get("preprocess") is not None: - warnings.warn( - "Using `create_ensemble_on` will override the given `preprocess` function." - ) cat.df["realization"] = generate_id(cat.df, ensemble_name) cat.esmcat.aggregation_control.aggregations.append( intake_esm.cat.Aggregation( @@ -507,15 +502,19 @@ def to_dataset( ) xrfreq = cat.df["xrfreq"].unique()[0] - def preprocess(ds): - ds = ensure_correct_time(ds, xrfreq) - if calendar is not None: - ds = ds.convert_calendar( - calendar, use_cftime=(calendar != "default"), align_on="date" - ) - return ds + if kwargs.get("preprocess") is None: + + def preprocess(ds): + ds = ensure_correct_time(ds, xrfreq) + if calendar is not None: + ds = ds.convert_calendar( + calendar, + use_cftime=(calendar != "default"), + align_on="date", + ) + return ds - kwargs["preprocess"] = preprocess + kwargs["preprocess"] = preprocess if len(rm_from_id) > 1: # Guess what the ID was and rebuild a new one, omitting the columns part of the aggregation @@ -537,6 +536,94 @@ def preprocess(ds): ds = cat.to_dask(**kwargs) return ds + def copy_files( + self, + dest: str | os.PathLike, + flat: bool = True, + unzip: bool = False, + zipzarr: bool = False, + inplace: bool = False, + ): + """Copy each file of the catalog to another location, unzipping datasets along the way if requested. + + Parameters + ---------- + cat: DataCatalog or ProjectCatalog + A catalog to copy. + dest: str, path + The root directory of the destination. + flat: bool + If True (default), all dataset files are copied in the same directory. + Renaming with an integer suffix ("{name}_01.{ext}") is done in case of duplicate file names. + If False, :py:func:`xscen.catutils.build_path` (with default arguments) is used to generated the new path below the destination. + Nothing is done in case of duplicates in that case. + unzip: bool + If True, any datasets with a `.zip` suffix are unzipped during the copy (or rather instead of a copy). + zipzarr: bool + If True, any datasets with a `.zarr` suffix are zipped during the copy (or rather instead of a copy). + inplace : bool + If True, the catalog is updated in place. If False (default), a copy is returned. + + Returns + ------- + If inplace is False, this returns a catalog similar to self except with updated filenames. Some special attributes are not preserved, + such as those added by :py:func:`xscen.extract.search_data_catalogs`. In this case, use `inplace=True`. + """ + # Local imports to avoid circular imports + from .catutils import build_path + from .io import unzip_directory, zip_directory + + dest = Path(dest) + data = self.esmcat._df.copy() + if flat: + new_paths = [] + for path in map(Path, data.path.values): + if unzip and path.suffix == ".zip": + new = dest / path.with_suffix("").name + elif zipzarr and path.suffix == ".zarr": + new = dest / path.with_suffix(".zarr.zip").name + else: + new = dest / path.name + if new in new_paths: + suffixes = "".join(new.suffixes) + name = new.name.removesuffix(suffixes) + i = 1 + while new in new_paths: + new = dest / (name + f"_{i:02d}" + suffixes) + i += 1 + new_paths.append(new) + data["new_path"] = new_paths + else: + data = build_path(data, root=dest).drop(columns=["new_path_type"]) + + msg = f"Will copy {len(data)} files." + logger.debug(msg) + for i, row in data.iterrows(): + old = Path(row.path) + new = Path(row.new_path) + if unzip and old.suffix == ".zip": + msg = f"Unzipping {old} to {new}." + logger.info(msg) + unzip_directory(old, new) + elif zipzarr and old.suffix == ".zarr": + msg = f"Zipping {old} to {new}." + logger.info(msg) + zip_directory(old, new) + elif old.is_dir(): + msg = f"Copying directory tree {old} to {new}." + logger.info(msg) + sh.copytree(old, new) + else: + msg = f"Copying file {old} to {new}." + logger.info(msg) + sh.copy(old, new) + if inplace: + self.esmcat._df["path"] = data["new_path"] + return + data["path"] = data["new_path"] + data = data.drop(columns=["new_path"]) + return self.__class__({"esmcat": self.esmcat.dict(), "df": data}) + class ProjectCatalog(DataCatalog): """A DataCatalog with additional 'write' functionalities that can update and upload itself. @@ -549,9 +636,9 @@ class ProjectCatalog(DataCatalog): @classmethod def create( cls, - filename: Union[os.PathLike, str], + filename: os.PathLike | str, *, - project: Optional[dict] = None, + project: dict | None = None, overwrite: bool = False, ): r"""Create a new project catalog from some project metadata. @@ -561,26 +648,26 @@ def create( Parameters ---------- filename : os.PathLike or str - A path to the json file (with or without suffix). + A path to the json file (with or without suffix). project : dict, optional - Metadata to create the catalog. If None, `CONFIG['project']` will be used. - Valid fields are: + Metadata to create the catalog. If None, `CONFIG['project']` will be used. + Valid fields are: - - title : Name of the project, given as the catalog's "title". - - id : slug-like version of the name, given as the catalog's id (should be url-proof) - Defaults to a modified name. - - version : Version of the project (and thus the catalog), string like "x.y.z". - - description : Detailed description of the project, given to the catalog's "description". - - Any other entry defined in :py:data:`esm_col_data`. + - title : Name of the project, given as the catalog's "title". + - id : slug-like version of the name, given as the catalog's id (should be url-proof) + Defaults to a modified name. + - version : Version of the project (and thus the catalog), string like "x.y.z". + - description : Detailed description of the project, given to the catalog's "description". + - Any other entry defined in :py:data:`esm_col_data`. - At least one of `id` and `title` must be given, the rest is optional. + At least one of `id` and `title` must be given, the rest is optional. overwrite : bool - If True, will overwrite any existing JSON and CSV file. + If True, will overwrite any existing JSON and CSV file. Returns ------- ProjectCatalog - An empty intake_esm catalog. + An empty intake_esm catalog. """ path = Path(filename) meta_path = path.with_suffix(".json") @@ -619,24 +706,25 @@ def create( ) # Change catalog_file to a relative path - with open(meta_path) as f: + with Path(meta_path).open(encoding="utf-8") as f: meta = json.load(f) meta["catalog_file"] = data_path.name - with open(meta_path, "w") as f: + with Path(meta_path).open("w", encoding="utf-8") as f: json.dump(meta, f, indent=2) return cls(str(meta_path)) def __init__( self, - df: Union[str, dict], + df: str | dict, *args, create: bool = False, overwrite: bool = False, - project: Optional[dict] = None, + project: dict | None = None, **kwargs, ): - """Open or create a project catalog. + """ + Open or create a project catalog. Parameters ---------- @@ -658,9 +746,7 @@ def __init__( The ‘df’ key must be a Pandas DataFrame containing content that would otherwise be in the CSV file. """ if create: - if isinstance(df, (str, Path)) and ( - not os.path.isfile(Path(df)) or overwrite - ): + if isinstance(df, str | Path) and (not Path(df).is_file() or overwrite): self.create(df, project=project, overwrite=overwrite) super().__init__(df, *args, **kwargs) self.check_valid() @@ -670,15 +756,13 @@ def __init__( # TODO: Implement a way to easily destroy part of the catalog to "reset" some steps def update( self, - df: Optional[ - Union[ - DataCatalog, - intake_esm.esm_datastore, - pd.DataFrame, - pd.Series, - Sequence[pd.Series], - ] - ] = None, + df: None | ( + DataCatalog + | intake_esm.esm_datastore + | pd.DataFrame + | pd.Series + | Sequence[pd.Series] + ) = None, ): """Update the catalog with new data and writes the new data to the csv file. @@ -760,8 +844,8 @@ def update( def update_from_ds( self, ds: xr.Dataset, - path: Union[os.PathLike, str], - info_dict: Optional[dict] = None, + path: os.PathLike | str, + info_dict: dict | None = None, **info_kwargs, ): """Update the catalog with new data and writes the new data to the csv file. @@ -798,7 +882,7 @@ def update_from_ds( if info_kwargs: d.update(info_kwargs) - if "time" in ds: + if "time" in ds.dims: d["date_start"] = str( ds.isel(time=0).time.dt.strftime("%4Y-%m-%d %H:%M:%S").values ) @@ -813,9 +897,8 @@ def update_from_ds( if "format" not in d: d["format"] = Path(d["path"]).suffix.split(".")[1] - logger.info( - f"File format not specified. Adding it as '{d['format']}' based on file name." - ) + msg = f"File format not specified. Adding it as '{d['format']}' based on file name." + logger.info(msg) self.update(pd.Series(d)) @@ -857,17 +940,20 @@ def concat_data_catalogs(*dcs): registry.update(dc.derivedcat._registry) catalogs.append(dc.df) requested_variables.extend(dc._requested_variables) - requested_variables_true.extend(dc._requested_variables_true) - dependent_variables.extend(dc._dependent_variables) - requested_variable_freqs.extend(dc._requested_variable_freqs) + requested_variables_true.extend(getattr(dc, "_requested_variables_true", [])) + dependent_variables.extend(getattr(dc, "_dependent_variables", [])) + requested_variable_freqs.extend(getattr(dc, "_requested_variable_freqs", [])) df = pd.concat(catalogs, axis=0).drop_duplicates(ignore_index=True) dvr = intake_esm.DerivedVariableRegistry() dvr._registry.update(registry) newcat = DataCatalog({"esmcat": dcs[0].esmcat.dict(), "df": df}, registry=dvr) newcat._requested_variables = requested_variables - newcat._requested_variables_true = requested_variables_true - newcat._dependent_variables = dependent_variables - newcat._requested_variable_freqs = requested_variable_freqs + if requested_variables_true: + newcat._requested_variables_true = requested_variables_true + if dependent_variables: + newcat._dependent_variables = dependent_variables + if requested_variable_freqs: + newcat._requested_variable_freqs = requested_variable_freqs return newcat @@ -877,7 +963,7 @@ def _build_id(element: pd.Series, columns: list[str]): def generate_id( - df: Union[pd.DataFrame, xr.Dataset], id_columns: Optional[list] = None + df: pd.DataFrame | xr.Dataset, id_columns: list | None = None ) -> pd.Series: """Create an ID from column entries. @@ -908,7 +994,7 @@ def generate_id( return df.apply(_build_id, axis=1, args=(id_columns,)) -def unstack_id(df: Union[pd.DataFrame, ProjectCatalog, DataCatalog]) -> dict: +def unstack_id(df: pd.DataFrame | ProjectCatalog | DataCatalog) -> dict: """Reverse-engineer an ID using catalog entries. Parameters @@ -921,7 +1007,7 @@ def unstack_id(df: Union[pd.DataFrame, ProjectCatalog, DataCatalog]) -> dict: dict Dictionary with one entry per unique ID, which are themselves dictionaries of all the individual parts of the ID. """ - if isinstance(df, (ProjectCatalog, DataCatalog)): + if isinstance(df, ProjectCatalog | DataCatalog): df = df.df out = {} @@ -933,7 +1019,7 @@ def unstack_id(df: Union[pd.DataFrame, ProjectCatalog, DataCatalog]) -> dict: [ col for col in subset.columns - if bool(re.search(f"((_)|(^)){str(subset[col].iloc[0])}((_)|($))", ids)) + if bool(re.search(f"((_)|(^)){subset[col].iloc[0]!s}((_)|($))", ids)) ] ].drop("id", axis=1) @@ -950,7 +1036,7 @@ def unstack_id(df: Union[pd.DataFrame, ProjectCatalog, DataCatalog]) -> dict: def subset_file_coverage( df: pd.DataFrame, - periods: Union[list[str], list[list[str]]], + periods: list[str] | list[list[str]], *, coverage: float = 0.99, duplicates_ok: bool = False, @@ -985,9 +1071,8 @@ def subset_file_coverage( # Check for duplicated Intervals if duplicates_ok is False and intervals.is_overlapping: - logging.warning( - f"{df['id'].iloc[0] + ': ' if 'id' in df.columns else ''}Time periods are overlapping." - ) + msg = f"{df['id'].iloc[0] + ': ' if 'id' in df.columns else ''}Time periods are overlapping." + logging.warning(msg) return pd.DataFrame(columns=df.columns) # Create an array of True/False @@ -1001,9 +1086,8 @@ def subset_file_coverage( files_in_range = intervals.overlaps(period_interval) if not files_in_range.any(): - logging.warning( - f"{df['id'].iloc[0] + ': ' if 'id' in df.columns else ''}Insufficient coverage (no files in range {period})." - ) + msg = f"{df['id'].iloc[0] + ': ' if 'id' in df.columns else ''}Insufficient coverage (no files in range {period})." + logging.warning(msg) return pd.DataFrame(columns=df.columns) # Very rough guess of the coverage relative to the requested period, @@ -1022,10 +1106,11 @@ def subset_file_coverage( ).length.sum() if guessed_length / period_length < coverage: - logging.warning( + msg = ( f"{df['id'].iloc[0] + ': ' if 'id' in df.columns else ''}Insufficient coverage " f"(guessed at {guessed_length / period_length:.1%})." ) + logging.warning(msg) return pd.DataFrame(columns=df.columns) files_to_keep.append(files_in_range) diff --git a/xscen/catutils.py b/src/xscen/catutils.py similarity index 83% rename from xscen/catutils.py rename to src/xscen/catutils.py index 46fc4145..44c8f2b3 100644 --- a/xscen/catutils.py +++ b/src/xscen/catutils.py @@ -12,9 +12,10 @@ from copy import deepcopy from fnmatch import fnmatch from functools import partial, reduce +from itertools import chain, combinations, product from multiprocessing import Pool from pathlib import Path -from typing import Any, Optional, Union +from typing import Any import cftime import netCDF4 @@ -35,10 +36,28 @@ logger = logging.getLogger(__name__) -__all__ = ["build_path", "parse_directory", "parse_from_ds", "register_parse_type"] +__all__ = [ + "build_path", + "parse_directory", + "parse_from_ds", + "patterns_from_schema", + "register_parse_type", +] # ## File finding and path parsing ## # +SUFFIX_TO_FORMAT = { + ".nc": "nc", + ".nc4": "nc", + ".zip": "zarr", + ".zarr.zip": "zarr", + ".zarr": "zarr", +} +"""Mapping from file suffix to format. + +This is used to populate the "format" esm catalog column from the parsed path. +""" + EXTRA_PARSE_TYPES = {} """Extra parse types to add to parse's default. @@ -91,7 +110,7 @@ def _parse_level(text: str) -> str: ) def _parse_datebounds( text: str, -) -> Union[list[str], tuple[None, None], tuple[str, str]]: +) -> list[str] | tuple[None, None] | tuple[str, str]: """Parse helper to translate date bounds, used in the special DATES field.""" if "-" in text: return text.split("-") @@ -101,22 +120,22 @@ def _parse_datebounds( def _find_assets( - root: Union[str, os.PathLike], + root: str | os.PathLike, exts: set[str], lengths: set[int], - dirglob: Optional[str] = None, + dirglob: str | None = None, ): """Walk recursively over files in a directory, filtering according to a glob pattern, path depth and extensions. Parameters ---------- - root: str or Pathlike + root : str or Pathlike Path of the directory to walk through. - exts: set of strings + exts : set of strings Set of file extensions to look for. - lengths: set of ints + lengths : set of ints Set of path depths to look for. - dirglob: str, optional + dirglob : str, optional A glob pattern. If given, only parent folders matching this pattern are walked through. This pattern can not include the asset's basename. """ @@ -140,11 +159,11 @@ def _find_assets( if ".zarr" in exts: for zr in zarrs: - yield os.path.join(top, zr) + yield Path(top).joinpath(zr).as_posix() if exts - {".zarr"}: # There are more exts than for file in files: - if os.path.splitext(file)[-1] in exts: - yield os.path.join(top, file) + if Path(file).suffix in exts: + yield Path(top).joinpath(file).as_posix() def _compile_pattern(pattern: str) -> parse.Parser: @@ -172,13 +191,13 @@ def _compile_pattern(pattern: str) -> parse.Parser: def _name_parser( - path: Union[os.PathLike, str], - root: Union[os.PathLike, str], - patterns: list[Union[str, parse.Parser]], - read_from_file: Optional[Union[list[str], dict]] = None, - attrs_map: Optional[dict] = None, - xr_open_kwargs: Optional[dict] = None, -) -> Optional[dict]: + path: os.PathLike | str, + root: os.PathLike | str, + patterns: list[str | parse.Parser], + read_from_file: list[str] | dict | None = None, + attrs_map: dict | None = None, + xr_open_kwargs: dict | None = None, +) -> dict | None: """Extract metadata information from the file path. Parameters @@ -223,7 +242,7 @@ def _name_parser( return None d["path"] = abs_path - d["format"] = path.suffix[1:] + d["format"] = SUFFIX_TO_FORMAT.get(path.suffix, path.suffix[1:]) if "DATES" in d: d["date_start"], d["date_end"] = d.pop("DATES") @@ -248,26 +267,26 @@ def _name_parser( def _parse_dir( # noqa: C901 - root: Union[os.PathLike, str], + root: os.PathLike | str, patterns: list[str], - dirglob: Optional[str] = None, - checks: Optional[list[str]] = None, - read_from_file: Optional[Union[list[str], dict]] = None, - attrs_map: Optional[dict] = None, - xr_open_kwargs: Optional[dict] = None, + dirglob: str | None = None, + checks: list[str] | None = None, + read_from_file: list[str] | dict | None = None, + attrs_map: dict | None = None, + xr_open_kwargs: dict | None = None, progress: bool = False, ): """Iterate and parses files in a directory, filtering according to basic pattern properties and optional checks. Parameters ---------- - root: os.PathLike or str + root : os.PathLike or str Path to walk through. - patterns: list of strings or compiled parsers + patterns : list of strings or compiled parsers Patterns that the files will be checked against. The extensions of the patterns are extracted and only paths with these are returned. Also, the depths of the patterns are calculated and only paths of this depth under the root are returned. - dirglob: str + dirglob : str A glob pattern. If given, only parent folders matching this pattern are walked through. This pattern can not include the asset's basename. checks: list of strings, optional @@ -283,7 +302,7 @@ def _parse_dir( # noqa: C901 If `read_from_file` is not None, passed directly to :py:func:`parse_from_ds`. xr_open_kwargs : dict, optional If `read_from_file` is not None, passed directly to :py:func:`parse_from_ds`. - progress: bool + progress : bool If True, the number of found files is printed to stdout. Return @@ -292,7 +311,7 @@ def _parse_dir( # noqa: C901 Metadata parsed from each found asset. """ lengths = {patt.count(os.path.sep) for patt in patterns} - exts = {os.path.splitext(patt)[-1] for patt in patterns} + exts = {Path(patt).suffix for patt in patterns} comp_patterns = list(map(_compile_pattern, patterns)) checks = checks or [] @@ -301,7 +320,7 @@ def _parse_dir( # noqa: C901 # Another thread runs the checks # Another thread parses the path and file. # In theory, for a local disk, walking a directory cannot be parallelized. This is not as true for network-mounted drives. - # Thus we parallelize the parsing steps. + # Thus, we parallelize the parsing steps. # If the name-parsing step becomes blocking, we could try to increase the number of threads (but netCDF4 can't multithread...) # Usually, the walking is the bottleneck. q_found = queue.Queue() @@ -324,7 +343,8 @@ def check_worker(): # TODO: testing for zarr validity is not implemented with netCDF4.Dataset(path): pass - except Exception: + # FIXME: This is a catch-all, we should catch the specific exception raised by netCDF4. + except Exception: # noqa: BLE001 valid = False if valid: q_checked.put(path) @@ -343,8 +363,10 @@ def parse_worker(): attrs_map=attrs_map, xr_open_kwargs=xr_open_kwargs, ) - except Exception as err: - logger.error(f"Parsing file {path} failed with {err}.") + # FIXME: This is not specific enough, we should catch the specific exception raised by _name_parser. + except Exception as err: # noqa: BLE001 + msg = f"Parsing file {path} failed with {err}." + logger.error(msg) else: if d is not None: parsed.append(d) @@ -355,7 +377,8 @@ def parse_worker(): ): print(f"Found {n:7d} files", end="\r") else: - logger.debug(f"File {path} didn't match any pattern.") + msg = f"File {path} didn't match any pattern." + logger.debug(msg) q_checked.task_done() CW = threading.Thread(target=check_worker, daemon=True) @@ -390,7 +413,7 @@ def _replace_in_row(oldrow: pd.Series, replacements: dict): List-like fields are handled. """ row = oldrow.copy() - list_cols = [col for col in oldrow.index if isinstance(oldrow[col], (tuple, list))] + list_cols = [col for col in oldrow.index if isinstance(oldrow[col], tuple | list)] for col, reps in replacements.items(): if col not in row: continue @@ -418,7 +441,8 @@ def _parse_first_ds( """Parse attributes from one file per group, apply them to the whole group.""" fromfile = parse_from_ds(grp.path.iloc[0], cols, attrs_map, **xr_open_kwargs) - logger.info(f"Got {len(fromfile)} fields, applying to {len(grp)} entries.") + msg = f"Got {len(fromfile)} fields, applying to {len(grp)} entries." + logger.info(msg) out = grp.copy() for col, val in fromfile.items(): for i in grp.index: # If val is an iterable we can't use loc. @@ -428,24 +452,24 @@ def _parse_first_ds( @parse_config def parse_directory( # noqa: C901 - directories: list[Union[str, os.PathLike]], + directories: str | list[str | os.PathLike], patterns: list[str], *, - id_columns: Optional[list[str]] = None, - read_from_file: Union[ - bool, - Sequence[str], - tuple[Sequence[str], Sequence[str]], - Sequence[tuple[Sequence[str], Sequence[str]]], - ] = False, - homogenous_info: Optional[dict] = None, - cvs: Optional[Union[str, os.PathLike, dict]] = None, - dirglob: Optional[str] = None, - xr_open_kwargs: Optional[Mapping[str, Any]] = None, + id_columns: list[str] | None = None, + read_from_file: ( + bool + | Sequence[str] + | tuple[Sequence[str], Sequence[str]] + | Sequence[tuple[Sequence[str], Sequence[str]]] + ) = False, + homogenous_info: dict | None = None, + cvs: str | os.PathLike | dict | None = None, + dirglob: str | None = None, + xr_open_kwargs: Mapping[str, Any] | None = None, only_official_columns: bool = True, progress: bool = False, - parallel_dirs: Union[bool, int] = False, - file_checks: Optional[list[str]] = None, + parallel_dirs: bool | int = False, + file_checks: list[str] | None = None, ) -> pd.DataFrame: r"""Parse files in a directory and return them as a pd.DataFrame. @@ -531,6 +555,8 @@ def parse_directory( # noqa: C901 pd.DataFrame Parsed directory files """ + if isinstance(directories, str | Path): + directories = [directories] homogenous_info = homogenous_info or {} xr_open_kwargs = xr_open_kwargs or {} if only_official_columns: @@ -562,7 +588,7 @@ def parse_directory( # noqa: C901 if cvs is not None: if not isinstance(cvs, dict): - with open(cvs) as f: + with Path(cvs).open(encoding="utf-8") as f: cvs = yaml.safe_load(f) attrs_map = cvs.pop("attributes", {}) else: @@ -596,8 +622,9 @@ def parse_directory( # noqa: C901 raise ValueError("No files found.") else: if progress: - print() - logger.info(f"Found and parsed {len(parsed)} files.") + print() # This is because of the \r outputted in the _parse_dir call. + msg = f"Found and parsed {len(parsed)} files." + logger.info(msg) # Path has become NaN when some paths didn't fit any passed pattern df = pd.DataFrame(parsed).dropna(axis=0, subset=["path"]) @@ -670,7 +697,8 @@ def parse_directory( # noqa: C901 warnings.warn( f"{n} invalid entries where the start and end dates are Null but the frequency is not 'fx'." ) - logger.debug(f"Paths: {df.path[invalid].values}") + msg = f"Paths: {df.path[invalid].values}" + logger.debug(msg) df = df[~invalid] # Exact opposite invalid = df.date_start.notnull() & df.date_end.notnull() & (df.xrfreq == "fx") @@ -679,7 +707,8 @@ def parse_directory( # noqa: C901 warnings.warn( f"{n} invalid entries where the start and end dates are given but the frequency is 'fx'." ) - logger.debug(f"Paths: {df.path[invalid].values}") + msg = f"Paths: {df.path[invalid].values}" + logger.debug(msg) df = df[~invalid] # Create id from user specifications @@ -697,9 +726,9 @@ def parse_directory( # noqa: C901 def parse_from_ds( # noqa: C901 - obj: Union[str, os.PathLike, xr.Dataset], + obj: str | os.PathLike | xr.Dataset, names: Sequence[str], - attrs_map: Optional[Mapping[str, str]] = None, + attrs_map: Mapping[str, str] | None = None, **xrkwargs, ): """Parse a list of catalog fields from the file/dataset itself. @@ -732,18 +761,21 @@ def parse_from_ds( # noqa: C901 obj = Path(obj) if isinstance(obj, Path) and obj.suffixes[-1] == ".zarr": - logger.info(f"Parsing attributes from Zarr {obj}.") + msg = f"Parsing attributes from Zarr {obj}." + logger.info(msg) ds_attrs, variables, time = _parse_from_zarr( obj, get_vars="variable" in names, get_time=get_time ) elif isinstance(obj, Path) and obj.suffixes[-1] == ".nc": - logger.info(f"Parsing attributes with netCDF4 from {obj}.") + msg = f"Parsing attributes with netCDF4 from {obj}." + logger.info(msg) ds_attrs, variables, time = _parse_from_nc( obj, get_vars="variable" in names, get_time=get_time ) else: if isinstance(obj, Path): - logger.info(f"Parsing attributes with xarray from {obj}.") + msg = f"Parsing attributes with xarray from {obj}." + logger.info(msg) obj = xr.open_dataset(obj, engine=get_engine(obj), **xrkwargs) ds_attrs = obj.attrs time = obj.indexes["time"] if "time" in obj else None @@ -780,12 +812,13 @@ def parse_from_ds( # noqa: C901 elif name in ds_attrs: attrs[name] = ds_attrs[name].strip() - logger.debug(f"Got fields {attrs.keys()} from file.") + msg = f"Got fields {attrs.keys()} from file." + logger.debug(msg) return attrs def _parse_from_zarr( - path: Union[os.PathLike, str], get_vars: bool = True, get_time: bool = True + path: os.PathLike | str, get_vars: bool = True, get_time: bool = True ): """Obtain the list of variables, the time coordinate and the list of global attributes from a zarr dataset. @@ -848,7 +881,7 @@ def _parse_from_zarr( def _parse_from_nc( - path: Union[os.PathLike, str], get_vars: bool = True, get_time: bool = True + path: os.PathLike | str, get_vars: bool = True, get_time: bool = True ): """Obtain the list of variables, the time coordinate, and the list of global attributes from a netCDF dataset, using netCDF4. @@ -901,7 +934,7 @@ def _schema_option(option: dict, facets: dict): return answer -def _schema_level(schema: Union[dict, list[str], str], facets: dict): +def _schema_level(schema: dict | list[str] | str, facets: dict): if isinstance(schema, str): if schema.startswith("(") and schema.endswith(")"): optional = True @@ -1015,7 +1048,7 @@ def _read_schemas(schemas): elif not isinstance(schemas, dict): if schemas is None: schemas = Path(__file__).parent / "data" / "file_schema.yml" - with open(schemas) as f: + with Path(schemas).open(encoding="utf-8") as f: schemas = yaml.safe_load(f) for name, schema in schemas.items(): missing_fields = {"with", "folders", "filename"} - set(schema.keys()) @@ -1027,14 +1060,14 @@ def _read_schemas(schemas): def _build_path( - data: Union[dict, xr.Dataset, xr.DataArray, pd.Series], + data: dict | xr.Dataset | xr.DataArray | pd.Series, schemas: dict, - root: Union[str, os.PathLike], + root: str | os.PathLike, get_type: bool = False, **extra_facets, -) -> Union[Path, tuple[Path, str]]: +) -> Path | tuple[Path, str]: # Get all known metadata - if isinstance(data, (xr.Dataset, xr.DataArray)): + if isinstance(data, xr.Dataset | xr.DataArray): facets = ( # Get non-attribute metadata parse_from_ds( @@ -1094,11 +1127,11 @@ def _build_path( @parse_config def build_path( - data: Union[dict, xr.Dataset, xr.DataArray, pd.Series, DataCatalog, pd.DataFrame], - schemas: Optional[Union[str, os.PathLike, dict]] = None, - root: Optional[Union[str, os.PathLike]] = None, + data: dict | xr.Dataset | xr.DataArray | pd.Series | DataCatalog | pd.DataFrame, + schemas: str | os.PathLike | dict | None = None, + root: str | os.PathLike | None = None, **extra_facets, -) -> Union[Path, DataCatalog, pd.DataFrame]: +) -> Path | DataCatalog | pd.DataFrame: r"""Parse the schema from a configuration and construct path using a dictionary of facets. Parameters @@ -1139,7 +1172,7 @@ def build_path( if root: root = Path(root) schemas = _read_schemas(schemas) - if isinstance(data, (esm_datastore, pd.DataFrame)): + if isinstance(data, esm_datastore | pd.DataFrame): if isinstance(data, esm_datastore): df = data.df else: @@ -1161,3 +1194,112 @@ def build_path( df["new_path_type"] = paths[1] return df return _build_path(data, schemas=schemas, root=root, get_type=False, **extra_facets) + + +def _as_template(a): + return "{" + a + "}" + + +def partial_format(template, **fmtargs): + """Format a template only partially, leaving un-formatted templates intact.""" + + class PartialFormatDict(dict): + def __missing__(self, key): + return _as_template(key) + + return template.format_map(PartialFormatDict(**fmtargs)) + + +def patterns_from_schema(schema: str | dict, exts: Sequence[str] | None = None): + """Generate all valid patterns for a given schema. + + Generated patterns are meant for use with :py:func:`parse_directory`. + This hardcodes the rule that facet can never contain a underscore ("_") except "variable". + File names are not strict except for the date bounds element which must be at the end if present. + + Parameters + ---------- + schema: dict or str + A dict with keys "with" (optional), "folders" and "filename", constructed as described + in the `xscen/data/file_schema.yml` file. + Or the name of a pattern group from that file. + exts: sequence of strings, optional + A list of file extensions to consider, with the leading dot. + Defaults to ``[".nc", ".zarr", ".zarr.zip"]``. + + Returns + ------- + list of patterns compatible with :py:func:`parse_directory`. + """ + if isinstance(schema, str): + schemas = Path(__file__).parent / "data" / "file_schema.yml" + with schemas.open(encoding="utf-8") as f: + schema = yaml.safe_load(f)[schema] + + # # Base folder patterns + + # Index of optional folder parts + opt_idx = [ + i + for i, k in enumerate(schema["folders"]) + if isinstance(k, str) and k.startswith("(") + ] + + raw_folders = [] + for skip in chain.from_iterable( + combinations(opt_idx, r) for r in range(len(opt_idx) + 1) + ): + # skip contains index of levels to skip + # we go through every possible missing levels combinations + parts = [] + for i, part in enumerate(schema["folders"]): + if i in skip: + continue + if isinstance(part, str): + if part.startswith("("): + part = part[1:-1] + parts.append(_as_template(part)) + elif isinstance(part, dict): + parts.append(part["text"]) + else: + parts.append("_".join(map(_as_template, part))) + raw_folders.append("/".join(parts)) + + # # Inject conditions + folders = raw_folders + for conditions in schema["with"]: + if "value" not in conditions: + # This means that the facet must be set. + # Not useful when parsing. Implicit with the facet in the pattern. + continue + + # Ensure a list + if isinstance(conditions["value"], str): + value = [conditions["value"]] + else: + value = conditions["value"] + + patterns = [] + for patt in folders: + for val in value: + patterns.append(partial_format(patt, **{conditions["facet"]: val})) + folders = patterns + + # # Inject parsing requirements (hardcoded :( ) + folders = [folder.replace("{variable}", "{variable:_}") for folder in folders] + + # # Filenames + if "DATES" in schema["filename"]: + if schema["filename"][-1] != "DATES": + raise ValueError( + "Reverse pattern generation is not supported for filenames with date bounds not at the end." + ) + filename = "{?:_}_{DATES}" + else: + filename = "{?:_}" + + exts = exts or [".nc", ".zarr", ".zarr.zip"] + + patterns = [f"{fold}/{filename}{ext}" for fold, ext in product(folders, exts)] + + return patterns diff --git a/xscen/config.py b/src/xscen/config.py similarity index 92% rename from xscen/config.py rename to src/xscen/config.py index 36d1a021..6305bc96 100644 --- a/xscen/config.py +++ b/src/xscen/config.py @@ -131,7 +131,10 @@ def args_as_str(*args: tuple[Any, ...]) -> tuple[str, ...]: def load_config( - *elements, reset: bool = False, encoding: str = None, verbose: bool = False + *elements, + reset: bool = False, + encoding: str | None = None, + verbose: bool = False, ): """Load configuration from given files or key=value pairs. @@ -151,12 +154,12 @@ def load_config( If a directory is passed, all `.yml` files of this directory are added, in alphabetical order. If a "key=value" string, "key" is a dotted name and value will be evaluated if possible. "key=value" pairs are set last, after all files are being processed. - reset: bool - If True, the current config is erased before loading files. - encoding: str, optional + reset : bool + If True, erases the current config before loading files. + encoding : str, optional The encoding to use when reading files. verbose: bool - if True, each element triggers a INFO log line. + If True, each element triggers a INFO log line. Example ------- @@ -178,7 +181,8 @@ def load_config( key, value = element.split("=") CONFIG.update_from_list([(key, value)]) if verbose: - logger.info(f"Updated the config with {element}.") + msg = f"Updated the config with {element}." + logger.info(msg) else: file = Path(element) if file.is_dir(): @@ -191,7 +195,8 @@ def load_config( with configfile.open(encoding=encoding) as f: recursive_update(CONFIG, yaml.safe_load(f)) if verbose: - logger.info(f"Updated the config with {configfile}.") + msg = f"Updated the config with {configfile}." + logger.info(msg) for module, old in zip(EXTERNAL_MODULES, old_external): if old != CONFIG.get(module, {}): @@ -213,13 +218,14 @@ def _wrapper(*args, **kwargs): from_config = CONFIG.get(module, {}).get(func.__name__, {}) sig = inspect.signature(func) if CONFIG.get("print_it_all"): - logger.debug(f"For func {func}, found config {from_config}.") - logger.debug(f"Original kwargs : {kwargs}") + msg = f"For func {func}, found config {from_config}.\nOriginal kwargs : {kwargs}" + logger.debug(msg) for k, v in from_config.items(): if k in sig.parameters: kwargs.setdefault(k, v) if CONFIG.get("print_it_all"): - logger.debug(f"Modified kwargs : {kwargs}") + msg = f"Modified kwargs : {kwargs}" + logger.debug(msg) return func(*args, **kwargs) diff --git a/xscen/data/IPCC_annual_global_tas.nc b/src/xscen/data/IPCC_annual_global_tas.nc similarity index 87% rename from xscen/data/IPCC_annual_global_tas.nc rename to src/xscen/data/IPCC_annual_global_tas.nc index 2cf8d2b8..7dffd8cb 100644 Binary files a/xscen/data/IPCC_annual_global_tas.nc and b/src/xscen/data/IPCC_annual_global_tas.nc differ diff --git a/xscen/data/file_schema.yml b/src/xscen/data/file_schema.yml similarity index 95% rename from xscen/data/file_schema.yml rename to src/xscen/data/file_schema.yml index 62b29660..36330e5b 100644 --- a/xscen/data/file_schema.yml +++ b/src/xscen/data/file_schema.yml @@ -17,6 +17,7 @@ # - text: < value > # A fixed string # filename: # The file name schema, a list of facet names. If a facet is empty, it will be skipped. Elements will be separated by "_". # # The special "DATES" facet will be replaced by the most concise way found to define the temporal range covered by the file. +# # DATES should only appear at the end. --- ### Original / raw data # @@ -25,14 +26,14 @@ original-non-sims: with: - facet: type - value: [station-obs, reconstruction, forecast] + value: [ station-obs, reconstruction, forecast ] - facet: processing_level value: raw folders: - type - domain - institution - - [source, version] + - [ source, version ] - (member) - frequency - variable @@ -66,7 +67,7 @@ original-sims-ba: folders: - type - processing_level - - [bias_adjust_project, version] + - [ bias_adjust_project, version ] - mip_era - activity - domain @@ -91,7 +92,7 @@ original-hydro-reconstruction: - hydrology_source - (hydrology_member) - institution - - [source, version] + - [ source, version ] - (member) - frequency - variable @@ -131,7 +132,7 @@ original-hydro-sims-ba: - hydrology_source - (hydrology_member) - processing_level - - [bias_adjust_project, version] + - [ bias_adjust_project, version ] - mip_era - activity - domain @@ -159,7 +160,7 @@ derived-sims-ba: - facet: bias_adjust_project folders: - type - - [bias_adjust_project, version] + - [ bias_adjust_project, version ] - mip_era - activity - institution @@ -198,7 +199,7 @@ derived-reconstruction: folders: - type - institution - - [source, version] + - [ source, version ] - (member) - domain - processing_level @@ -215,7 +216,7 @@ derived-hydro-sims-ba: - hydrology_project - hydrology_source - (hydrology_member) - - [bias_adjust_project, version] + - [ bias_adjust_project, version ] - mip_era - activity - institution @@ -260,7 +261,7 @@ derived-hydro-reconstruction: - hydrology_source - (hydrology_member) - institution - - [source, version] + - [ source, version ] - (member) - domain - processing_level diff --git a/src/xscen/data/fr/LC_MESSAGES/xscen.mo b/src/xscen/data/fr/LC_MESSAGES/xscen.mo new file mode 100644 index 00000000..3821b177 Binary files /dev/null and b/src/xscen/data/fr/LC_MESSAGES/xscen.mo differ diff --git a/src/xscen/data/fr/LC_MESSAGES/xscen.po b/src/xscen/data/fr/LC_MESSAGES/xscen.po new file mode 100644 index 00000000..269ebed3 --- /dev/null +++ b/src/xscen/data/fr/LC_MESSAGES/xscen.po @@ -0,0 +1,26 @@ +msgid "{window}-year climatological {operation} of {attr}." +msgstr "Moyenne {window} ans de {attr}." + +msgid "{attr1}: {kind} delta compared to {refhoriz}." +msgstr "{attr1}: Delta {kind} comparé à {refhoriz}." + +msgid "Ranking of measure performance" +msgstr "Classement de performance de la mesure" + +msgid "Fraction of improved grid cells" +msgstr "Fraction de points de grille améliorés" + +msgid "Variable" +msgstr "Variable" + +msgid "Description" +msgstr "Description" + +msgid "Units" +msgstr "Unités" + +msgid "Content" +msgstr "Contenu" + +msgid "Global attributes" +msgstr "Attributs globaux" diff --git a/xscen/diagnostics.py b/src/xscen/diagnostics.py similarity index 93% rename from xscen/diagnostics.py rename to src/xscen/diagnostics.py index c916cf53..2412a5ac 100644 --- a/xscen/diagnostics.py +++ b/src/xscen/diagnostics.py @@ -7,7 +7,6 @@ from copy import deepcopy from pathlib import Path from types import ModuleType -from typing import Optional, Union import numpy as np import xarray as xr @@ -15,6 +14,12 @@ import xclim.core.dataflags from xclim.core.indicator import Indicator +# FIXME: Remove this when updating minimum xclim version to 0.53 +try: # Changed in xclim 0.53 + from xclim.core import ValidationError +except ImportError: + from xclim.core.utils import ValidationError + from .config import parse_config from .indicators import load_xclim_module from .utils import ( @@ -44,21 +49,21 @@ def _(s): @parse_config def health_checks( # noqa: C901 - ds: Union[xr.Dataset, xr.DataArray], + ds: xr.Dataset | xr.DataArray, *, - structure: Optional[dict] = None, - calendar: Optional[str] = None, - start_date: Optional[str] = None, - end_date: Optional[str] = None, - variables_and_units: Optional[dict] = None, - cfchecks: Optional[dict] = None, - freq: Optional[str] = None, - missing: Optional[Union[dict, str, list]] = None, - flags: Optional[dict] = None, - flags_kwargs: Optional[dict] = None, + structure: dict | None = None, + calendar: str | None = None, + start_date: str | None = None, + end_date: str | None = None, + variables_and_units: dict | None = None, + cfchecks: dict | None = None, + freq: str | None = None, + missing: dict | str | list | None = None, + flags: dict | None = None, + flags_kwargs: dict | None = None, return_flags: bool = False, - raise_on: Optional[list] = None, -) -> Union[None, xr.Dataset]: + raise_on: list | None = None, +) -> None | xr.Dataset: """ Perform a series of health checks on the dataset. Be aware that missing data checks and flag checks can be slow. @@ -174,7 +179,7 @@ def _message(): # Check the calendar if calendar is not None: - cal = xc.core.calendar.get_calendar(ds.time) + cal = ds.time.dt.calendar if xc.core.calendar.common_calendar([calendar]).replace( "default", "standard" ) != xc.core.calendar.common_calendar([cal]).replace("default", "standard"): @@ -210,7 +215,7 @@ def _message(): with xc.set_options(data_validation="raise"): try: xc.core.units.check_units(ds[v], variables_and_units[v]) - except xc.core.utils.ValidationError as e: + except ValidationError as e: _error(f"'{v}' ValidationError: {e}", "variables_and_units") _error( f"The variable '{v}' does not have the expected units '{variables_and_units[v]}'. Received '{ds[v].attrs['units']}'.", @@ -232,7 +237,7 @@ def _message(): with xc.set_options(cf_compliance="raise"): try: getattr(xc.core.cfchecks, check)(**cfchecks[v][check]) - except xc.core.utils.ValidationError as e: + except ValidationError as e: _error(f"'{v}' ValidationError: {e}", "cfchecks") if freq is not None: @@ -270,9 +275,10 @@ def _message(): "missing", ) else: - logger.info( + msg = ( f"Variable '{v}' has no time dimension. The missing data check will be skipped.", ) + logger.info(msg) if flags is not None: if return_flags: @@ -303,18 +309,18 @@ def _message(): @parse_config def properties_and_measures( # noqa: C901 ds: xr.Dataset, - properties: Union[ - str, - os.PathLike, - Sequence[Indicator], - Sequence[tuple[str, Indicator]], - ModuleType, - ], - period: Optional[list[str]] = None, + properties: ( + str + | os.PathLike + | Sequence[Indicator] + | Sequence[tuple[str, Indicator]] + | ModuleType + ), + period: list[str] | None = None, unstack: bool = False, - rechunk: Optional[dict] = None, - dref_for_measure: Optional[xr.Dataset] = None, - change_units_arg: Optional[dict] = None, + rechunk: dict | None = None, + dref_for_measure: xr.Dataset | None = None, + change_units_arg: dict | None = None, to_level_prop: str = "diag-properties", to_level_meas: str = "diag-measures", ) -> tuple[xr.Dataset, xr.Dataset]: @@ -361,7 +367,7 @@ def properties_and_measures( # noqa: C901 -------- xclim.sdba.properties, xclim.sdba.measures, xclim.core.indicator.build_indicator_module_from_yaml """ - if isinstance(properties, (str, Path)): + if isinstance(properties, str | Path): logger.debug("Loading properties module.") module = load_xclim_module(properties) properties = module.iter_indicators() @@ -373,7 +379,8 @@ def properties_and_measures( # noqa: C901 except TypeError: N = None else: - logger.info(f"Computing {N} properties.") + msg = f"Computing {N} properties." + logger.info(msg) period = standardize_periods(period, multiple=False) # select period for ds @@ -405,7 +412,8 @@ def properties_and_measures( # noqa: C901 else: iden = ind.identifier # Make the call to xclim - logger.info(f"{i} - Computing {iden}.") + msg = f"{i} - Computing {iden}." + logger.info(msg) out = ind(ds=ds) vname = out.name prop[vname] = out @@ -443,7 +451,7 @@ def properties_and_measures( # noqa: C901 def measures_heatmap( - meas_datasets: Union[list[xr.Dataset], dict], to_level: str = "diag-heatmap" + meas_datasets: list[xr.Dataset] | dict, to_level: str = "diag-heatmap" ) -> xr.Dataset: """Create a heatmap to compare the performance of the different datasets. @@ -525,7 +533,7 @@ def measures_heatmap( def measures_improvement( - meas_datasets: Union[list[xr.Dataset], dict], to_level: str = "diag-improved" + meas_datasets: list[xr.Dataset] | dict, to_level: str = "diag-improved" ) -> xr.Dataset: """ Calculate the fraction of improved grid points for each property between two datasets of measures. diff --git a/xscen/ensembles.py b/src/xscen/ensembles.py similarity index 94% rename from xscen/ensembles.py rename to src/xscen/ensembles.py index fa9fe392..b1897c9b 100644 --- a/xscen/ensembles.py +++ b/src/xscen/ensembles.py @@ -7,7 +7,6 @@ from copy import deepcopy from itertools import chain, groupby from pathlib import Path -from typing import Optional, Union import numpy as np import xarray as xr @@ -16,6 +15,7 @@ from .catalog import DataCatalog from .catutils import generate_id from .config import parse_config +from .indicators import compute_indicators from .regrid import regrid_dataset from .spatial import subset from .utils import clean_up, get_cat_attrs @@ -32,17 +32,17 @@ @parse_config def ensemble_stats( # noqa: C901 - datasets: Union[ - dict, - list[Union[str, os.PathLike]], - list[xr.Dataset], - list[xr.DataArray], - xr.Dataset, - ], + datasets: ( + dict + | list[str | os.PathLike] + | list[xr.Dataset] + | list[xr.DataArray] + | xr.Dataset + ), statistics: dict, *, - create_kwargs: Optional[dict] = None, - weights: Optional[xr.DataArray] = None, + create_kwargs: dict | None = None, + weights: xr.DataArray | None = None, common_attrs_only: bool = True, to_level: str = "ensemble", ) -> xr.Dataset: @@ -108,7 +108,7 @@ def ensemble_stats( # noqa: C901 statistics = deepcopy(statistics) # to avoid modifying the original dictionary # if input files are .zarr, change the engine automatically - if isinstance(datasets, list) and isinstance(datasets[0], (str, os.PathLike)): + if isinstance(datasets, list) and isinstance(datasets[0], str | os.PathLike): path = Path(datasets[0]) if path.suffix == ".zarr": create_kwargs.setdefault("engine", "zarr") @@ -134,9 +134,8 @@ def ensemble_stats( # noqa: C901 for stat in statistics_to_compute: stats_kwargs = deepcopy(statistics.get(stat) or {}) - logger.info( - f"Calculating {stat} from an ensemble of {len(ens.realization)} simulations." - ) + msg = f"Calculating {stat} from an ensemble of {len(ens.realization)} simulations." + logger.info(msg) # Workaround for robustness_categories real_stat = None @@ -182,9 +181,8 @@ def ensemble_stats( # noqa: C901 f"{v} is a delta, but 'ref' was still specified." ) if delta_kind in ["rel.", "relative", "*", "/"]: - logging.info( - f"Relative delta detected for {v}. Applying 'v - 1' before change_significance." - ) + msg = f"Relative delta detected for {v}. Applying 'v - 1' before change_significance." + logging.info(msg) ens_v = ens[v] - 1 else: ens_v = ens[v] @@ -248,15 +246,14 @@ def ensemble_stats( # noqa: C901 def generate_weights( # noqa: C901 - datasets: Union[dict, list], + datasets: dict | list, *, independence_level: str = "model", balance_experiments: bool = False, - attribute_weights: Optional[dict] = None, + attribute_weights: dict | None = None, skipna: bool = True, - v_for_skipna: Optional[str] = None, + v_for_skipna: str | None = None, standardize: bool = False, - experiment_weights: bool = False, ) -> xr.DataArray: """Use realization attributes to automatically generate weights along the 'realization' dimension. @@ -294,8 +291,6 @@ def generate_weights( # noqa: C901 Variable to use for skipna=False. If None, the first variable in the first dataset is used. standardize : bool If True, the weights are standardized to sum to 1 (per timestep/horizon, if skipna=False). - experiment_weights : bool - Deprecated. Use balance_experiments instead. Notes ----- @@ -312,23 +307,9 @@ def generate_weights( # noqa: C901 xr.DataArray Weights along the 'realization' dimension, or 2D weights along the 'realization' and 'time/horizon' dimensions if skipna=False. """ - if experiment_weights is True: - warnings.warn( - "`experiment_weights` has been renamed and will be removed in a future release. Use `balance_experiments` instead.", - category=FutureWarning, - ) - balance_experiments = True - if isinstance(datasets, list): datasets = {i: datasets[i] for i in range(len(datasets))} - if independence_level == "all": - warnings.warn( - "The independence level 'all' is deprecated and will be removed in a future version. Use 'model' instead.", - category=FutureWarning, - ) - independence_level = "model" - if independence_level not in ["model", "GCM", "institution"]: raise ValueError( f"'independence_level' should be between 'model', 'GCM', and 'institution', received {independence_level}." @@ -336,9 +317,8 @@ def generate_weights( # noqa: C901 if skipna is False: if v_for_skipna is None: v_for_skipna = list(datasets[list(datasets.keys())[0]].data_vars)[0] - logger.info( - f"Using '{v_for_skipna}' as the variable to check for missing values." - ) + msg = f"Using '{v_for_skipna}' as the variable to check for missing values." + logger.info(msg) # Check if any dataset has dimensions that are not 'time' or 'horizon' other_dims = { @@ -827,17 +807,18 @@ def _partition_from_catalog( def build_partition_data( - datasets: Union[dict, list[xr.Dataset]], + datasets: dict | list[xr.Dataset], partition_dim: list[str] = ["realization", "experiment", "bias_adjust_project"], - subset_kw: dict = None, - regrid_kw: dict = None, - indicators_kw: dict = None, - calendar_kw: dict = None, - rename_dict: dict = None, - to_dataset_kw: dict = None, + subset_kw: dict | None = None, + regrid_kw: dict | None = None, + indicators_kw: dict | None = None, + calendar_kw: dict | None = None, + rename_dict: dict | None = None, + to_dataset_kw: dict | None = None, to_level: str = "partition-ensemble", ): - """Get the input for the xclim partition functions. + """ + Get the input for the xclim partition functions. From a list or dictionary of datasets, create a single dataset with `partition_dim` dimensions (and time) to pass to one of the xclim partition functions @@ -862,11 +843,11 @@ def build_partition_data( Components of the partition. They will become the dimension of the output. The default is ['source', 'experiment', 'bias_adjust_project']. For source, the dimension will actually be institution_source_member. - subset_kw: dict + subset_kw : dict, optional Arguments to pass to `xs.spatial.subset()`. - regrid_kw: + regrid_kw : dict, optional Arguments to pass to `xs.regrid_dataset()`. - indicators_kw: + indicators_kw : dict, optional Arguments to pass to `xs.indicators.compute_indicators()`. All indicators have to be for the same frequency, in order to be put on a single time axis. calendar_kw : dict, optional @@ -877,7 +858,7 @@ def build_partition_data( This is the same behavior as `calendar` in xclim.create_ensemble. For conversions involving '360_day', the align_on='date' option is used by default. If False, no conversion is done. - rename_dict: + rename_dict : dict, optional Dictionary to rename the dimensions from xscen names to xclim names. The default is {'source': 'model', 'bias_adjust_project': 'downscaling', 'experiment': 'scenario'}. to_level: str @@ -891,7 +872,6 @@ def build_partition_data( See Also -------- xclim.ensembles - """ # TODO: add warning if both realization and source in partition_dim if isinstance(datasets, dict): @@ -915,16 +895,6 @@ def build_partition_data( "datasets should be a list or a dictionary of xarray datasets or a xscen.DataCatalog" ) - # # convert calendars - # if isinstance(calendar_kw, dict): - # common_cal = xc.core.calendar.common_calendar(calendars, join="outer") - # calendar_kw.setdefault("target", common_cal) - # calendar_kw.setdefault("align_on", "date") - # list_ds = [ - # xc.core.calendar.convert_calendar(ds, **calendar_kw) for ds in list_ds - # ] - # ens = xr.merge(list_ds) - rename_dict = rename_dict or {} rename_dict.setdefault("realization", "model") rename_dict.setdefault("source", "model") @@ -941,11 +911,11 @@ def build_partition_data( @parse_config def reduce_ensemble( - data: Union[xr.DataArray, dict, list, xr.Dataset], + data: xr.DataArray | dict | list | xr.Dataset, method: str, *, - horizons: Optional[list[str]] = None, - create_kwargs: Optional[dict] = None, + horizons: list[str] | None = None, + create_kwargs: dict | None = None, **kwargs, ): r"""Reduce an ensemble of simulations using clustering algorithms from xclim.ensembles. @@ -984,7 +954,7 @@ def reduce_ensemble( If the indicators are a mix of yearly, seasonal, and monthly, they should be stacked on the same time/horizon axis and put in the same dataset. You can use py:func:`xscen.utils.unstack_dates` on seasonal or monthly indicators to this end. """ - if isinstance(data, (list, dict)): + if isinstance(data, list | dict): data = ensembles.create_ensemble(datasets=data, **(create_kwargs or {})) if horizons: if "horizon" not in data.dims: diff --git a/xscen/extract.py b/src/xscen/extract.py similarity index 93% rename from xscen/extract.py rename to src/xscen/extract.py index f5abc8ef..1bab39c4 100644 --- a/xscen/extract.py +++ b/src/xscen/extract.py @@ -6,10 +6,9 @@ import re import warnings from collections import defaultdict -from collections.abc import Sequence +from collections.abc import Callable, Sequence from copy import deepcopy from pathlib import Path -from typing import Callable, Optional, Union import numpy as np import pandas as pd @@ -49,16 +48,16 @@ def extract_dataset( # noqa: C901 catalog: DataCatalog, *, - variables_and_freqs: Optional[dict] = None, - periods: Optional[Union[list[str], list[list[str]]]] = None, - region: Optional[dict] = None, + variables_and_freqs: dict | None = None, + periods: list[str] | list[list[str]] | None = None, + region: dict | None = None, to_level: str = "extracted", ensure_correct_time: bool = True, - xr_open_kwargs: Optional[dict] = None, - xr_combine_kwargs: Optional[dict] = None, - preprocess: Optional[Callable] = None, - resample_methods: Optional[dict] = None, - mask: Union[bool, xr.Dataset, xr.DataArray] = False, + xr_open_kwargs: dict | None = None, + xr_combine_kwargs: dict | None = None, + preprocess: Callable | None = None, + resample_methods: dict | None = None, + mask: bool | xr.Dataset | xr.DataArray = False, ) -> dict: """ Take one element of the output of `search_data_catalogs` and returns a dataset, @@ -243,10 +242,8 @@ def extract_dataset( # noqa: C901 if pd.to_timedelta( CV.xrfreq_to_timedelta(catalog[key].df["xrfreq"].iloc[0]) ) < pd.to_timedelta(CV.xrfreq_to_timedelta(xrfreq)): - logger.info( - f"Resampling {var_name} from [{catalog[key].df['xrfreq'].iloc[0]}]" - f" to [{xrfreq}]." - ) + msg = f"Resampling {var_name} from [{catalog[key].df['xrfreq'].iloc[0]}] to [{xrfreq}]." + logger.info(msg) ds = ds.assign( { var_name: resample( @@ -281,7 +278,9 @@ def extract_dataset( # noqa: C901 slices = [] for period in periods_extract: slices.extend([ds.sel({"time": slice(period[0], period[1])})]) - ds = xr.concat(slices, dim="time", **xr_combine_kwargs) + ds = xr.concat( + slices, dim="time", **xr_kwargs["xarray_combine_by_coords_kwargs"] + ) # subset to the region if region is not None: @@ -322,9 +321,9 @@ def resample( # noqa: C901 da: xr.DataArray, target_frequency: str, *, - ds: Optional[xr.Dataset] = None, - method: Optional[str] = None, - missing: Optional[Union[str, dict]] = None, + ds: xr.Dataset | None = None, + method: str | None = None, + missing: str | dict | None = None, ) -> xr.DataArray: """Aggregate variable to the target frequency. @@ -371,19 +370,20 @@ def resample( # noqa: C901 and var_name in CV.resampling_methods.dict[target_frequency] ): method = CV.resampling_methods(target_frequency)[var_name] - logger.info( - f"Resampling method for {var_name}: '{method}', based on variable name and frequency." - ) + msg = f"Resampling method for {var_name}: '{method}', based on variable name and frequency." + logger.info(msg) elif var_name in CV.resampling_methods.dict["any"]: method = CV.resampling_methods("any")[var_name] - logger.info( + msg = ( f"Resampling method for {var_name}: '{method}', based on variable name." ) + logger.info(msg) else: method = "mean" - logger.info(f"Resampling method for {var_name} defaulted to: 'mean'.") + msg = f"Resampling method for {var_name} defaulted to: 'mean'." + logger.info(msg) weights = None if ( @@ -541,23 +541,23 @@ def resample( # noqa: C901 @parse_config def search_data_catalogs( # noqa: C901 - data_catalogs: Union[ - str, os.PathLike, DataCatalog, list[Union[str, os.PathLike, DataCatalog]] - ], + data_catalogs: ( + str | os.PathLike | DataCatalog | list[str | os.PathLike | DataCatalog] + ), variables_and_freqs: dict, *, - other_search_criteria: Optional[dict] = None, - exclusions: Optional[dict] = None, + other_search_criteria: dict | None = None, + exclusions: dict | None = None, match_hist_and_fut: bool = False, - periods: Optional[Union[list[str], list[list[str]]]] = None, - coverage_kwargs: Optional[dict] = None, - id_columns: Optional[list[str]] = None, + periods: list[str] | list[list[str]] | None = None, + coverage_kwargs: dict | None = None, + id_columns: list[str] | None = None, allow_resampling: bool = False, allow_conversion: bool = False, - conversion_yaml: Optional[str] = None, - restrict_resolution: Optional[str] = None, - restrict_members: Optional[dict] = None, - restrict_warming_level: Optional[Union[dict, bool]] = None, + conversion_yaml: str | None = None, + restrict_resolution: str | None = None, + restrict_members: dict | None = None, + restrict_warming_level: dict | bool | None = None, ) -> dict: """Search through DataCatalogs. @@ -632,31 +632,35 @@ def search_data_catalogs( # noqa: C901 -------- intake_esm.core.esm_datastore.search """ - cat_kwargs = {} - if allow_conversion: - if conversion_yaml is None: - conversion_yaml = Path(__file__).parent / "xclim_modules" / "conversions" - cat_kwargs = { - "registry": registry_from_module(load_xclim_module(conversion_yaml)) - } - # Cast single items to a list - if isinstance(data_catalogs, (str, os.PathLike, DataCatalog)): + if isinstance(data_catalogs, str | os.PathLike | DataCatalog): data_catalogs = [data_catalogs] + # Open the catalogs given as paths - for i, dc in enumerate(data_catalogs): - if isinstance(dc, (str, os.PathLike)): - data_catalogs[i] = ( - DataCatalog(dc, **cat_kwargs) + data_catalogs = [ + ( + dc + if not isinstance(dc, str | os.PathLike) + else ( + DataCatalog(dc) if Path(dc).suffix == ".json" else DataCatalog.from_df(dc) ) + ) + for dc in data_catalogs + ] - if not isinstance(data_catalogs, list) or not all( - isinstance(dc, DataCatalog) for dc in data_catalogs - ): + if not all(isinstance(dc, DataCatalog) for dc in data_catalogs): raise ValueError("Catalogs type not recognized.") + cat_kwargs = {} + if allow_conversion: + if conversion_yaml is None: + conversion_yaml = Path(__file__).parent / "xclim_modules" / "conversions" + cat_kwargs = { + "registry": registry_from_module(load_xclim_module(conversion_yaml)) + } + # Prepare a unique catalog to search from, with the DerivedCat added if required catalog = DataCatalog( { @@ -665,7 +669,8 @@ def search_data_catalogs( # noqa: C901 }, **cat_kwargs, ) - logger.info(f"Catalog opened: {catalog} from {len(data_catalogs)} files.") + msg = f"Catalog opened: {catalog} from {len(data_catalogs)} files." + logger.info(msg) if match_hist_and_fut: logger.info("Dispatching historical dataset to future experiments.") @@ -678,15 +683,15 @@ def search_data_catalogs( # noqa: C901 catalog.esmcat._df = pd.concat([catalog.df, ex.df]).drop_duplicates( keep=False ) - logger.info( - f"Removing {len(ex.df)} assets based on exclusion dict '{k}': {exclusions[k]}." - ) + msg = f"Removing {len(ex.df)} assets based on exclusion dict '{k}': {exclusions[k]}." + logger.info(msg) full_catalog = deepcopy(catalog) # Used for searching for fixed fields if other_search_criteria: catalog = catalog.search(**other_search_criteria) - logger.info( + msg = ( f"{len(catalog.df)} assets matched the criteria : {other_search_criteria}." ) + logger.info(msg) if restrict_warming_level: if isinstance(restrict_warming_level, bool): restrict_warming_level = {} @@ -714,7 +719,8 @@ def search_data_catalogs( # noqa: C901 coverage_kwargs = coverage_kwargs or {} periods = standardize_periods(periods) - logger.info(f"Iterating over {len(catalog.unique('id'))} potential datasets.") + msg = f"Iterating over {len(catalog.unique('id'))} potential datasets." + logger.info(msg) # Loop on each dataset to assess whether they have all required variables # And select best freq/timedelta for each catalogs = {} @@ -776,9 +782,8 @@ def search_data_catalogs( # noqa: C901 varcat = scat.search( variable=var_id, require_all_on=["id", "xrfreq"] ) - logger.debug( - f"At var {var_id}, after search cat has {varcat.derivedcat.keys()}" - ) + msg = f"At var {var_id}, after search cat has {varcat.derivedcat.keys()}" + logger.debug(msg) # TODO: Temporary fix until this is changed in intake_esm varcat._requested_variables_true = [var_id] varcat._dependent_variables = list( @@ -845,9 +850,8 @@ def search_data_catalogs( # noqa: C901 varcat.esmcat._df = pd.DataFrame() if varcat.df.empty: - logger.debug( - f"Dataset {sim_id} doesn't have all needed variables (missing at least {var_id})." - ) + msg = f"Dataset {sim_id} doesn't have all needed variables (missing at least {var_id})." + logger.debug(msg) break if "timedelta" in varcat.df.columns: varcat.df.drop(columns=["timedelta"], inplace=True) @@ -863,9 +867,8 @@ def search_data_catalogs( # noqa: C901 catalogs[sim_id]._requested_periods = periods if len(catalogs) > 0: - logger.info( - f"Found {len(catalogs)} with all variables requested and corresponding to the criteria." - ) + msg = f"Found {len(catalogs)} with all variables requested and corresponding to the criteria." + logger.info(msg) else: logger.warning("Found no match corresponding to the search criteria.") @@ -880,17 +883,17 @@ def search_data_catalogs( # noqa: C901 @parse_config def get_warming_level( # noqa: C901 - realization: Union[ - xr.Dataset, xr.DataArray, dict, pd.Series, pd.DataFrame, str, list - ], + realization: ( + xr.Dataset | xr.DataArray | dict | pd.Series | pd.DataFrame | str | list + ), wl: float, *, window: int = 20, - tas_baseline_period: Optional[Sequence[str]] = None, + tas_baseline_period: Sequence[str] | None = None, ignore_member: bool = False, - tas_src: Optional[Union[str, os.PathLike]] = None, + tas_src: str | os.PathLike | None = None, return_horizon: bool = True, -) -> Union[dict, list[str], str]: +) -> dict | list[str] | str: """ Use the IPCC Atlas method to return the window of time over which the requested level of global warming is first reached. @@ -943,7 +946,7 @@ def get_warming_level( # noqa: C901 FIELDS = ["mip_era", "source", "experiment", "member"] - if isinstance(realization, (xr.Dataset, str, dict, pd.Series)): + if isinstance(realization, xr.Dataset | str | dict | pd.Series): reals = [realization] elif isinstance(realization, pd.DataFrame): reals = (row for i, row in realization.iterrows()) @@ -990,7 +993,7 @@ def get_warming_level( # noqa: C901 info_models.append(info) # open nc - tas = xr.open_dataset(tas_src, engine="h5netcdf").tas + tas = xr.open_dataset(tas_src).tas def _get_warming_level(model): # choose colum based in ds cat attrs, +'$' to ensure a full match (matches end-of-string) @@ -1015,9 +1018,10 @@ def _get_warming_level(model): ) tas_sel = tas.isel(simulation=candidates.argmax()) selected = "_".join([tas_sel[c].item() for c in FIELDS]) - logger.debug( + msg = ( f"Computing warming level +{wl}°C for {model} from simulation: {selected}." ) + logger.debug(msg) # compute reference temperature for the warming and difference from reference yearly_diff = tas_sel - tas_sel.sel(time=slice(*tas_baseline_period)).mean() @@ -1033,10 +1037,11 @@ def _get_warming_level(model): yrs = rolling_diff.where(rolling_diff >= wl, drop=True) if yrs.size == 0: - logger.info( + msg = ( f"Global warming level of +{wl}C is not reached by the last year " f"({tas.time[-1].dt.year.item()}) of the provided 'tas_src' database for {selected}." ) + logger.info(msg) return [None, None] if return_horizon else None yr = yrs.isel(time=0).time.dt.year.item() @@ -1066,11 +1071,11 @@ def _get_warming_level(model): @parse_config def subset_warming_level( ds: xr.Dataset, - wl: Union[float, Sequence[float]], + wl: float | Sequence[float], to_level: str = "warminglevel-{wl}vs{period0}-{period1}", - wl_dim: Union[str, bool] = "+{wl}Cvs{period0}-{period1}", + wl_dim: str | bool = "+{wl}Cvs{period0}-{period1}", **kwargs, -) -> Optional[xr.Dataset]: +) -> xr.Dataset | None: r""" Subsets the input dataset with only the window of time over which the requested level of global warming is first reached, using the IPCC Atlas method. @@ -1122,7 +1127,7 @@ def subset_warming_level( # Fake time generation is needed : real is a dim or multiple levels if ( fake_time is None - and not isinstance(wl, (int, float)) + and not isinstance(wl, int | float) or "realization" in ds.dims ): freq = xr.infer_freq(ds.time) @@ -1136,7 +1141,7 @@ def subset_warming_level( ) # If we got a wl sequence, call ourself multiple times and concatenate - if not isinstance(wl, (int, float)): + if not isinstance(wl, int | float): if not wl_dim or (isinstance(wl_dim, str) and "{wl}" not in wl_dim): raise ValueError( "`wl_dim` must be True or a template string including '{wl}' if multiple levels are passed." @@ -1230,8 +1235,8 @@ def subset_warming_level( else: # WL not reached, not in ds, or not fully contained in ds.time if wl_not_reached: - ds_wl = ds.isel(time=slice(0, fake_time.size)) * np.NaN - wlbnds = (("warminglevel", "wl_bounds"), [[np.NaN, np.NaN]]) + ds_wl = ds.isel(time=slice(0, fake_time.size)) * np.nan + wlbnds = (("warminglevel", "wl_bounds"), [[np.nan, np.nan]]) else: wlbnds = ( ("warminglevel", "wl_bounds"), @@ -1267,7 +1272,7 @@ def subset_warming_level( def _dispatch_historical_to_future( - catalog: DataCatalog, id_columns: Optional[list[str]] = None + catalog: DataCatalog, id_columns: list[str] | None = None ) -> DataCatalog: """Update a DataCatalog by recopying each "historical" entry to its corresponding future experiments. @@ -1331,9 +1336,9 @@ def _dispatch_historical_to_future( "For example, xscen expects experiment `historical` to have `CMIP` activity " "and experiments `sspXYZ` to have `ScenarioMIP` activity. " ) - for activity_id in set(sdf.activity) - {"HighResMip", np.NaN}: + for activity_id in set(sdf.activity) - {"HighResMip", np.nan}: sub_sdf = sdf[sdf.activity == activity_id] - for exp_id in set(sub_sdf.experiment) - {"historical", "piControl", np.NaN}: + for exp_id in set(sub_sdf.experiment) - {"historical", "piControl", np.nan}: exp_hist = hist.copy() exp_hist["experiment"] = exp_id exp_hist["activity"] = activity_id @@ -1381,7 +1386,7 @@ def _dispatch_historical_to_future( def _restrict_by_resolution( - catalogs: dict, restrictions: str, id_columns: Optional[list[str]] = None + catalogs: dict, restrictions: str, id_columns: list[str] | None = None ) -> dict: """Update the results from search_data_catalogs by removing simulations with multiple resolutions available. @@ -1422,7 +1427,8 @@ def _restrict_by_resolution( domains = pd.unique(df_sim["domain"]) if len(domains) > 1: - logger.info(f"Dataset {i} appears to have multiple resolutions.") + msg = f"Dataset {i} appears to have multiple resolutions." + logger.info(msg) # For CMIP, the order is dictated by a list of grid labels if "MIP" in pd.unique(df_sim["activity"])[0]: @@ -1498,10 +1504,8 @@ def _restrict_by_resolution( ) else: - logger.warning( - f"Dataset {i} seems to have multiple resolutions, " - "but its activity is not yet recognized or supported." - ) + msg = f"Dataset {i} seems to have multiple resolutions, but its activity is not yet recognized or supported." + logger.warning(msg) chosen = list(domains) pass @@ -1514,14 +1518,15 @@ def _restrict_by_resolution( ) for k in to_remove: - logger.info(f"Removing {k} from the results.") + msg = f"Removing {k} from the results." + logger.info(msg) catalogs.pop(k) return catalogs def _restrict_multimembers( - catalogs: dict, restrictions: dict, id_columns: Optional[list[str]] = None + catalogs: dict, restrictions: dict, id_columns: list[str] | None = None ): """Update the results from search_data_catalogs by removing simulations with multiple members available. @@ -1557,9 +1562,8 @@ def _restrict_multimembers( members = pd.unique(df_sim["member"]) if len(members) > 1: - logger.info( - f"Dataset {i} has {len(members)} valid members. Restricting as per requested." - ) + msg = f"Dataset {i} has {len(members)} valid members. Restricting as per requested." + logger.info(msg) if "ordered" in restrictions: members = natural_sort(members)[0 : restrictions["ordered"]] @@ -1577,7 +1581,8 @@ def _restrict_multimembers( ) for k in to_remove: - logger.info(f"Removing {k} from the results.") + msg = f"Removing {k} from the results." + logger.info(msg) catalogs.pop(k) return catalogs @@ -1604,7 +1609,6 @@ def _restrict_wl(df: pd.DataFrame, restrictions: dict): to_keep = get_warming_level(df, return_horizon=False, **restrictions).notnull() removed = pd.unique(df[~to_keep]["id"]) df = df[to_keep] - logger.info( - f"Removing the following datasets because of the restriction for warming levels: {list(removed)}" - ) + msg = f"Removing the following datasets because of the restriction for warming levels: {list(removed)}" + logger.info(msg) return df diff --git a/xscen/indicators.py b/src/xscen/indicators.py similarity index 65% rename from xscen/indicators.py rename to src/xscen/indicators.py index f47b2b12..35bbe40d 100644 --- a/xscen/indicators.py +++ b/src/xscen/indicators.py @@ -6,27 +6,26 @@ from functools import partial from pathlib import Path from types import ModuleType -from typing import Optional, Union +import pandas as pd import xarray as xr import xclim as xc from intake_esm import DerivedVariableRegistry +from xclim.core.calendar import construct_offset, parse_offset from xclim.core.indicator import Indicator from yaml import safe_load from xscen.config import parse_config from .catutils import parse_from_ds -from .utils import CV, standardize_periods +from .utils import CV, rechunk_for_resample, standardize_periods logger = logging.getLogger(__name__) __all__ = ["compute_indicators", "load_xclim_module", "registry_from_module"] -def load_xclim_module( - filename: Union[str, os.PathLike], reload: bool = False -) -> ModuleType: +def load_xclim_module(filename: str | os.PathLike, reload: bool = False) -> ModuleType: """Return the xclim module described by the yaml file (or group of yaml, jsons and py). Parameters @@ -62,20 +61,57 @@ def load_xclim_module( return xc.build_indicator_module_from_yaml(filename) +def get_indicator_outputs(ind: xc.core.indicator.Indicator, in_freq: str): + """Returns the variables names and resampling frequency of a given indicator. + + CAUTION : Some indicators will build the variable name on-the-fly according to the arguments. + This function will return the template string (with "{}"). + + Parameters + ---------- + ind : Indicator + An xclim indicator + in_freq : str + The data's sampling frequency. + + Returns + ------- + var_names : list + List of variable names + freq : str + Indicator resampling frequency. "fx" for time-reducing indicator. + """ + if isinstance(ind, xc.core.indicator.ReducingIndicator): + frq = "fx" + elif not isinstance(ind, xc.core.indicator.ResamplingIndicator): + frq = in_freq + else: + frq = ( + ind.injected_parameters["freq"] + if "freq" in ind.injected_parameters + else ind.parameters["freq"].default + ) + if frq == "YS": + frq = "YS-JAN" + var_names = [cfa["var_name"] for cfa in ind.cf_attrs] + return var_names, frq + + @parse_config def compute_indicators( # noqa: C901 ds: xr.Dataset, - indicators: Union[ - str, - os.PathLike, - Sequence[Indicator], - Sequence[tuple[str, Indicator]], - ModuleType, - ], + indicators: ( + str + | os.PathLike + | Sequence[Indicator] + | Sequence[tuple[str, Indicator]] + | ModuleType + ), *, - periods: Optional[Union[list[str], list[list[str]]]] = None, + periods: list[str] | list[list[str]] | None = None, restrict_years: bool = True, - to_level: Optional[str] = "indicators", + to_level: str | None = "indicators", + rechunk_input: bool = False, ) -> dict: """Calculate variables and indicators based on a YAML call to xclim. @@ -105,6 +141,10 @@ def compute_indicators( # noqa: C901 to_level : str, optional The processing level to assign to the output. If None, the processing level of the inputs is preserved. + rechunk_input : bool + If True, the dataset is rechunked with :py:func:`flox.xarray.rechunk_for_blockwise` + according to the resampling frequency of the indicator. Each rechunking is done + once per frequency with :py:func:`xscen.utils.rechunk_for_resample`. Returns ------- @@ -115,7 +155,7 @@ def compute_indicators( # noqa: C901 -------- xclim.indicators, xclim.core.indicator.build_indicator_module_from_yaml """ - if isinstance(indicators, (str, os.PathLike)): + if isinstance(indicators, str | os.PathLike): logger.debug("Loading indicator module.") module = load_xclim_module(indicators) indicators = module.iter_indicators() @@ -127,20 +167,12 @@ def compute_indicators( # noqa: C901 except TypeError: N = None else: - logger.info(f"Computing {N} indicators.") - - def _infer_freq_from_meta(ind): - return ( - ind.injected_parameters["freq"] - if "freq" in ind.injected_parameters - else ( - ind.parameters["freq"].default - if "freq" in ind.parameters - else ind.src_freq - ) - ) + msg = f"Computing {N} indicators." + logger.info(msg) periods = standardize_periods(periods) + in_freq = xr.infer_freq(ds.time) if "time" in ds.dims else "fx" + dss_rechunked = {} out_dict = dict() for i, ind in enumerate(indicators, 1): @@ -148,11 +180,32 @@ def _infer_freq_from_meta(ind): iden, ind = ind else: iden = ind.identifier - logger.info(f"{i} - Computing {iden}.") + msg = f"{i} - Computing {iden}." + logger.info(msg) + + _, freq = get_indicator_outputs(ind, in_freq) + + if rechunk_input and freq not in ["fx", in_freq]: + if freq not in dss_rechunked: + msg = f"Rechunking with flox for freq {freq}." + logger.debug(msg) + dss_rechunked[freq] = rechunk_for_resample(ds, time=freq) + else: + msg = f"Using rechunked for freq {freq}" + logger.debug(msg) + ds_in = dss_rechunked[freq] + else: + ds_in = ds if periods is None: + # Pandas as no semiannual frequency and 2Q is capricious + if freq.startswith("2Q"): + logger.debug( + "Dropping start of timeseries to ensure semiannual frequency works." + ) + ds_in = fix_semiannual(ds_in, freq) # Make the call to xclim - out = ind(ds=ds) + out = ind(ds=ds_in) # In the case of multiple outputs, merge them into a single dataset if isinstance(out, tuple): @@ -161,23 +214,18 @@ def _infer_freq_from_meta(ind): else: out = out.to_dataset() - # Infer the indicator's frequency - if "time" in out.dims: - if len(out.time) < 3: - freq = _infer_freq_from_meta(ind) - else: - freq = xr.infer_freq(out.time) - else: - freq = "fx" - if freq == "YS": - freq = "YS-JAN" - else: # Multiple time periods to concatenate concats = [] for period in periods: # Make the call to xclim - ds_subset = ds.sel(time=slice(period[0], period[1])) + ds_subset = ds_in.sel(time=slice(period[0], period[1])) + # Pandas as no semiannual frequency and 2Q is capricious + if freq.startswith("2Q"): + logger.debug( + "Dropping start of timeseries to ensure semiannual frequency works." + ) + ds_subset = fix_semiannual(ds_subset, freq) tmp = ind(ds=ds_subset) # In the case of multiple outputs, merge them into a single dataset @@ -187,21 +235,9 @@ def _infer_freq_from_meta(ind): else: tmp = tmp.to_dataset() - # Infer the indicator's frequency - if "time" in tmp.dims: - if len(tmp.time) < 3: - freq = _infer_freq_from_meta(ind) - else: - freq = xr.infer_freq(tmp.time) - else: - freq = "fx" - - if freq == "YS": - freq = "YS-JAN" # In order to concatenate time periods, the indicator still needs a time dimension if freq == "fx": tmp = tmp.assign_coords({"time": ds_subset.time[0]}) - concats.append(tmp) out = xr.concat(concats, dim="time") @@ -245,7 +281,7 @@ def _infer_freq_from_meta(ind): def registry_from_module( module: ModuleType, - registry: Optional[DerivedVariableRegistry] = None, + registry: DerivedVariableRegistry | None = None, variable_column: str = "variable", ) -> DerivedVariableRegistry: """Convert a xclim virtual indicators module to an intake_esm Derived Variable Registry. @@ -280,7 +316,7 @@ def registry_from_module( def _ensure_list(x): - if not isinstance(x, (list, tuple)): + if not isinstance(x, list | tuple): return [x] return x @@ -299,13 +335,13 @@ def func(ds, *, ind, nout): def select_inds_for_avail_vars( ds: xr.Dataset, - indicators: Union[ - str, - os.PathLike, - Sequence[Indicator], - Sequence[tuple[str, Indicator]], - ModuleType, - ], + indicators: ( + str + | os.PathLike + | Sequence[Indicator] + | Sequence[tuple[str, Indicator]] + | ModuleType + ), ) -> ModuleType: """Filter the indicators for which the necessary variables are available. @@ -331,17 +367,19 @@ def select_inds_for_avail_vars( is_list_of_tuples = isinstance(indicators, list) and all( isinstance(i, tuple) for i in indicators ) - if isinstance(indicators, (str, os.PathLike)): + if isinstance(indicators, str | os.PathLike): logger.debug("Loading indicator module.") indicators = load_xclim_module(indicators, reload=True) if hasattr(indicators, "iter_indicators"): indicators = [(name, ind) for name, ind in indicators.iter_indicators()] - elif isinstance(indicators, (list, tuple)) and not is_list_of_tuples: + elif isinstance(indicators, list | tuple) and not is_list_of_tuples: indicators = [(ind.base, ind) for ind in indicators] - available_vars = { - var for var in ds.data_vars if var in xc.core.utils.VARIABLES.keys() - } + # FIXME: Remove if-else when updating minimum xclim version to 0.53 + XCVARS = ( + xc.core.VARIABLES if hasattr(xc.core, "VARIABLES") else xc.core.utils.VARIABLES + ) + available_vars = {var for var in ds.data_vars if var in XCVARS.keys()} available_inds = [ (name, ind) for var in available_vars @@ -351,3 +389,48 @@ def select_inds_for_avail_vars( return xc.core.indicator.build_indicator_module( "inds_for_avail_vars", available_inds, reload=True ) + + +def _wrap_month(m): + # Ensure the month number is between 1 and 12 + # Modulo returns 0 if m is a multiple of 12, 0 is false and we want 12. + return (m % 12) or 12 + + +def fix_semiannual(ds, freq): + """Avoid wrong start dates for semiannual frequency. + + Resampling with offsets that are multiples of a base frequency (ex: 2QS-OCT) is broken in pandas (https://github.com/pandas-dev/pandas/issues/51563). + This will cut the beggining of the dataset so it starts exactly at the beginning of the resampling period. + """ + # I hate that we have to do that + mul, b, s, anc = parse_offset(freq) + if mul != 2 or b != "Q": + raise NotImplementedError("This only fixes 2Q frequencies.") + # Get MONTH: N mapping (invert xarray's) + months_inv = xr.coding.cftime_offsets._MONTH_ABBREVIATIONS + months = dict(zip(months_inv.values(), months_inv.keys())) + + if s: + m1 = months[anc] + else: + m1 = _wrap_month(months[anc] + 1) + freq = construct_offset(mul, b, True, months_inv[m1]) + m2 = _wrap_month(m1 + 6) + + time = ds.indexes["time"] + if isinstance(time, xr.CFTimeIndex): + offset = xr.coding.cftime_offsets.to_offset(freq) + is_on_offset = offset.onOffset + else: + offset = pd.tseries.frequencies.to_offset(freq) + is_on_offset = offset.is_on_offset + + if is_on_offset(time[0]) and time[0].month in (m1, m2): + # wow, already correct! + return ds + + for t in time: + if is_on_offset(t) and t.month in (m1, m2): + return ds.sel(time=(time >= t)) + raise ValueError(f"Can't find a start date that fits with frequency {freq}.") diff --git a/xscen/io.py b/src/xscen/io.py similarity index 80% rename from xscen/io.py rename to src/xscen/io.py index 11357cc8..f0c2e149 100644 --- a/xscen/io.py +++ b/src/xscen/io.py @@ -8,7 +8,7 @@ from collections.abc import Sequence from inspect import signature from pathlib import Path -from typing import Optional, Union +from zipfile import ZipFile import h5py import netCDF4 @@ -18,7 +18,6 @@ import zarr from numcodecs.bitround import BitRound from rechunker import rechunk as _rechunk -from xclim.core.calendar import get_calendar from xclim.core.options import METADATA_LOCALES from xclim.core.options import OPTIONS as XC_OPTIONS @@ -43,11 +42,16 @@ "save_to_zarr", "subset_maxsize", "to_table", + "unzip_directory", + "zip_directory", ] -def get_engine(file: Union[str, os.PathLike]) -> str: - """Use functionality of h5py to determine if a NetCDF file is compatible with h5netcdf. +def get_engine(file: str | os.PathLike) -> str: + """Determine which Xarray engine should be used to open the given file. + + The .zarr, .zarr.zip and .zip extensions are recognized as Zarr datasets, + the rest is seen as a netCDF. If the file is HDF5, the h5netcdf engine is used. Parameters ---------- @@ -60,7 +64,7 @@ def get_engine(file: Union[str, os.PathLike]) -> str: Engine to use with xarray """ # find the ideal engine for xr.open_mfdataset - if Path(file).suffix == ".zarr": + if Path(file).suffix in [".zarr", ".zip", ".zarr.zip"]: engine = "zarr" elif h5py.is_hdf5(file): engine = "h5netcdf" @@ -71,7 +75,7 @@ def get_engine(file: Union[str, os.PathLike]) -> str: def estimate_chunks( # noqa: C901 - ds: Union[str, os.PathLike, xr.Dataset], + ds: str | os.PathLike | xr.Dataset, dims: list, target_mb: float = 50, chunk_per_variable: bool = False, @@ -152,7 +156,7 @@ def _estimate_chunks(ds, target_mb, size_of_slice, rechunk_dims): out = {} # If ds is the path to a file, use NetCDF4 - if isinstance(ds, (str, os.PathLike)): + if isinstance(ds, str | os.PathLike): ds = netCDF4.Dataset(ds, "r") # Loop on variables @@ -239,7 +243,8 @@ def subset_maxsize( size_of_file = size_of_file + (varsize * dtype_size) / 1024**3 if size_of_file < maxsize_gb: - logger.info(f"Dataset is already smaller than {maxsize_gb} Gb.") + msg = f"Dataset is already smaller than {maxsize_gb} Gb." + logger.info(msg) return [ds] elif "time" in ds: @@ -256,7 +261,7 @@ def subset_maxsize( ) -def clean_incomplete(path: Union[str, os.PathLike], complete: Sequence[str]) -> None: +def clean_incomplete(path: str | os.PathLike, complete: Sequence[str]) -> None: """Delete un-catalogued variables from a zarr folder. The goal of this function is to clean up an incomplete calculation. @@ -280,7 +285,8 @@ def clean_incomplete(path: Union[str, os.PathLike], complete: Sequence[str]) -> for fold in filter(lambda p: p.is_dir(), path.iterdir()): if fold.name not in complete: - logger.warning(f"Removing {fold} from disk") + msg = f"Removing {fold} from disk" + logger.warning(msg) sh.rmtree(fold) @@ -288,9 +294,9 @@ def _coerce_attrs(attrs): """Ensure no funky objects in attrs.""" for k in list(attrs.keys()): if not ( - isinstance(attrs[k], (str, float, int, np.ndarray)) - or isinstance(attrs[k], (tuple, list)) - and isinstance(attrs[k][0], (str, float, int)) + isinstance(attrs[k], str | float | int | np.ndarray) + or isinstance(attrs[k], tuple | list) + and isinstance(attrs[k][0], str | float | int) ): attrs[k] = str(attrs[k]) @@ -327,7 +333,7 @@ def round_bits(da: xr.DataArray, keepbits: int): return da -def _get_keepbits(bitround: Union[bool, int, dict], varname: str, vartype): +def _get_keepbits(bitround: bool | int | dict, varname: str, vartype): # Guess the number of bits to keep depending on how bitround was passed, the var dtype and the var name. if not np.issubdtype(vartype, np.floating) or bitround is False: if isinstance(bitround, dict) and varname in bitround: @@ -347,12 +353,12 @@ def _get_keepbits(bitround: Union[bool, int, dict], varname: str, vartype): @parse_config def save_to_netcdf( ds: xr.Dataset, - filename: Union[str, os.PathLike], + filename: str | os.PathLike, *, - rechunk: Optional[dict] = None, - bitround: Union[bool, int, dict] = False, + rechunk: dict | None = None, + bitround: bool | int | dict = False, compute: bool = True, - netcdf_kwargs: Optional[dict] = None, + netcdf_kwargs: dict | None = None, ): """Save a Dataset to NetCDF, rechunking or compressing if requested. @@ -414,56 +420,57 @@ def save_to_netcdf( @parse_config def save_to_zarr( # noqa: C901 ds: xr.Dataset, - filename: Union[str, os.PathLike], + filename: str | os.PathLike, *, - rechunk: Optional[dict] = None, - zarr_kwargs: Optional[dict] = None, + rechunk: dict | None = None, + zarr_kwargs: dict | None = None, compute: bool = True, - encoding: Optional[dict] = None, - bitround: Union[bool, int, dict] = False, + encoding: dict | None = None, + bitround: bool | int | dict = False, mode: str = "f", itervar: bool = False, timeout_cleanup: bool = True, ): - """Save a Dataset to Zarr format, rechunking and compressing if requested. + """ + Save a Dataset to Zarr format, rechunking and compressing if requested. According to mode, removes variables that we don't want to re-compute in ds. Parameters ---------- ds : xr.Dataset - Dataset to be saved. + Dataset to be saved. filename : str - Name of the Zarr file to be saved. + Name of the Zarr file to be saved. rechunk : dict, optional - This is a mapping from dimension name to new chunks (in any format understood by dask). - Spatial dimensions can be generalized as 'X' and 'Y' which will be mapped to the actual grid type's - dimension names. - Rechunking is only done on *data* variables sharing dimensions with this argument. + This is a mapping from dimension name to new chunks (in any format understood by dask). + Spatial dimensions can be generalized as 'X' and 'Y' which will be mapped to the actual grid type's + dimension names. + Rechunking is only done on *data* variables sharing dimensions with this argument. zarr_kwargs : dict, optional - Additional arguments to send to_zarr() + Additional arguments to send to_zarr() compute : bool - Whether to start the computation or return a delayed object. + Whether to start the computation or return a delayed object. mode : {'f', 'o', 'a'} - If 'f', fails if any variable already exists. - if 'o', removes the existing variables. - if 'a', skip existing variables, writes the others. + If 'f', fails if any variable already exists. + if 'o', removes the existing variables. + if 'a', skip existing variables, writes the others. encoding : dict, optional - If given, skipped variables are popped in place. + If given, skipped variables are popped in place. bitround : bool or int or dict - If not False, float variables are bit-rounded by dropping a certain number of bits from their mantissa, - allowing for a much better compression. - If an int, this is the number of bits to keep for all float variables. - If a dict, a mapping from variable name to the number of bits to keep. - If True, the number of bits to keep is guessed based on the variable's name, defaulting to 12, - which yields a relative error of 0.012%. + If not False, float variables are bit-rounded by dropping a certain number of bits from their mantissa, + allowing for a much better compression. + If an int, this is the number of bits to keep for all float variables. + If a dict, a mapping from variable name to the number of bits to keep. + If True, the number of bits to keep is guessed based on the variable's name, defaulting to 12, + which yields a relative error of 0.012%. itervar : bool - If True, (data) variables are written one at a time, appending to the zarr. - If False, this function computes, no matter what was passed to kwargs. + If True, (data) variables are written one at a time, appending to the zarr. + If False, this function computes, no matter what was passed to kwargs. timeout_cleanup : bool - If True (default) and a :py:class:`xscen.scripting.TimeoutException` is raised during the writing, - the variable being written is removed from the dataset as it is incomplete. - This does nothing if `compute` is False. + If True (default) and a :py:class:`xscen.scripting.TimeoutException` is raised during the writing, + the variable being written is removed from the dataset as it is incomplete. + This does nothing if `compute` is False. Returns ------- @@ -504,7 +511,8 @@ def _skip(var): if mode == "o": if exists: var_path = path / var - logger.warning(f"Removing {var_path} to overwrite.") + msg = f"Removing {var_path} to overwrite." + logger.warning(msg) sh.rmtree(var_path) return False @@ -515,7 +523,8 @@ def _skip(var): for var in list(ds.data_vars.keys()): if _skip(var): - logger.info(f"Skipping {var} in {path}.") + msg = f"Skipping {var} in {path}." + logger.info(msg) ds = ds.drop_vars(var) if encoding: encoding.pop(var) @@ -541,7 +550,8 @@ def _skip(var): dsbase = ds.drop_vars(allvars) dsbase.to_zarr(path, **zarr_kwargs, mode="w") for i, (name, var) in enumerate(ds.data_vars.items()): - logger.debug(f"Writing {name} ({i + 1} of {len(ds.data_vars)}) to {path}") + msg = f"Writing {name} ({i + 1} of {len(ds.data_vars)}) to {path}" + logger.debug(msg) dsvar = ds.drop_vars(allvars - {name}) try: dsvar.to_zarr( @@ -552,21 +562,22 @@ def _skip(var): ) except TimeoutException: if timeout_cleanup: - logger.info(f"Removing incomplete {name}.") + msg = f"Removing incomplete {name}." + logger.info(msg) sh.rmtree(path / name) raise else: - logger.debug(f"Writing {list(ds.data_vars.keys())} for {filename}.") + msg = f"Writing {list(ds.data_vars.keys())} for {filename}." + logger.debug(msg) try: return ds.to_zarr( filename, compute=compute, mode="a", encoding=encoding, **zarr_kwargs ) except TimeoutException: if timeout_cleanup: - logger.info( - f"Removing incomplete {list(ds.data_vars.keys())} for {filename}." - ) + msg = f"Removing incomplete {list(ds.data_vars.keys())} for {filename}." + logger.info(msg) for name in ds.data_vars: sh.rmtree(path / name) raise @@ -625,13 +636,13 @@ def _to_dataframe( def to_table( - ds: Union[xr.Dataset, xr.DataArray], + ds: xr.Dataset | xr.DataArray, *, - row: Optional[Union[str, Sequence[str]]] = None, - column: Optional[Union[str, Sequence[str]]] = None, - sheet: Optional[Union[str, Sequence[str]]] = None, - coords: Union[bool, str, Sequence[str]] = True, -) -> Union[pd.DataFrame, dict]: + row: str | Sequence[str] | None = None, + column: str | Sequence[str] | None = None, + sheet: str | Sequence[str] | None = None, + coords: bool | str | Sequence[str] = True, +) -> pd.DataFrame | dict: """Convert a dataset to a pandas DataFrame with support for multicolumns and multisheet. This function will trigger a computation of the dataset. @@ -664,7 +675,11 @@ def to_table( if isinstance(ds, xr.Dataset): da = ds.to_array(name="data") if len(ds) == 1: - da = da.isel(variable=0).rename(data=da.variable.values[0]) + da = da.isel(variable=0) + da.name = str(da["variable"].values) + da = da.drop_vars("variable") + else: + da = ds def _ensure_list(seq): if isinstance(seq, str): @@ -678,7 +693,13 @@ def _ensure_list(seq): row = [d for d in da.dims if d != "variable" and d not in passed_dims] row = _ensure_list(row) if column is None: - column = ["variable"] if len(ds) > 1 and "variable" not in passed_dims else [] + column = ( + ["variable"] + if isinstance(ds, xr.Dataset) + and len(ds) > 1 + and "variable" not in passed_dims + else [] + ) column = _ensure_list(column) if sheet is None: sheet = [] @@ -697,10 +718,10 @@ def _ensure_list(seq): if coords is not True: coords = _ensure_list(coords or []) - drop = set(ds.coords.keys()) - set(da.dims) - set(coords) + drop = set(da.coords.keys()) - set(da.dims) - set(coords) da = da.drop_vars(drop) else: - coords = list(set(ds.coords.keys()) - set(da.dims)) + coords = list(set(da.coords.keys()) - set(da.dims)) if len(coords) > 1 and ("variable" in row or "variable" in sheet): raise NotImplementedError( "Keeping auxiliary coords is not implemented when 'variable' is in the row or in the sheets." @@ -724,9 +745,7 @@ def _ensure_list(seq): return _to_dataframe(da, **table_kwargs) -def make_toc( - ds: Union[xr.Dataset, xr.DataArray], loc: Optional[str] = None -) -> pd.DataFrame: +def make_toc(ds: xr.Dataset | xr.DataArray, loc: str | None = None) -> pd.DataFrame: """Make a table of content describing a dataset's variables. This return a simple DataFrame with variable names as index, the long_name as "description" and units. @@ -765,6 +784,26 @@ def make_toc( ], ).set_index(_("Variable")) toc.attrs["name"] = _("Content") + + # Add global attributes by using a fake variable and description + if len(ds.attrs) > 0: + globattr = pd.DataFrame.from_records( + [ + { + _("Variable"): vv, + _("Description"): da, + _("Units"): "", + } + for vv, da in ds.attrs.items() + ], + ).set_index(_("Variable")) + globattr.attrs["name"] = _("Global attributes") + + # Empty row to separate global attributes from variables + toc = pd.concat([toc, pd.DataFrame(index=[""])]) + toc = pd.concat([toc, pd.DataFrame(index=[_("Global attributes")])]) + toc = pd.concat([toc, globattr]) + return toc @@ -772,17 +811,17 @@ def make_toc( def save_to_table( - ds: Union[xr.Dataset, xr.DataArray], - filename: Union[str, os.PathLike], - output_format: Optional[str] = None, + ds: xr.Dataset | xr.DataArray, + filename: str | os.PathLike, + output_format: str | None = None, *, - row: Optional[Union[str, Sequence[str]]] = None, - column: Union[None, str, Sequence[str]] = "variable", - sheet: Optional[Union[str, Sequence[str]]] = None, - coords: Union[bool, Sequence[str]] = True, + row: str | Sequence[str] | None = None, + column: None | str | Sequence[str] = "variable", + sheet: str | Sequence[str] | None = None, + coords: bool | Sequence[str] = True, col_sep: str = "_", - row_sep: Optional[str] = None, - add_toc: Union[bool, pd.DataFrame] = False, + row_sep: str | None = None, + add_toc: bool | pd.DataFrame = False, **kwargs, ): """Save the dataset to a tabular file (csv, excel, ...). @@ -920,13 +959,13 @@ def rechunk_for_saving(ds: xr.Dataset, rechunk: dict): @parse_config def rechunk( - path_in: Union[os.PathLike, str, xr.Dataset], - path_out: Union[os.PathLike, str], + path_in: os.PathLike | str | xr.Dataset, + path_out: os.PathLike | str, *, - chunks_over_var: Optional[dict] = None, - chunks_over_dim: Optional[dict] = None, + chunks_over_var: dict | None = None, + chunks_over_dim: dict | None = None, worker_mem: str, - temp_store: Optional[Union[os.PathLike, str]] = None, + temp_store: os.PathLike | str | None = None, overwrite: bool = False, ) -> None: """Rechunk a dataset into a new zarr. @@ -976,7 +1015,7 @@ def rechunk( elif chunks_over_dim: chunks = {v: {d: chunks_over_dim[d] for d in ds[v].dims} for v in variables} chunks.update(time=None, lat=None, lon=None) - cal = get_calendar(ds) + cal = ds.time.dt.calendar Nt = ds.time.size chunks = translate_time_chunk(chunks, cal, Nt) else: @@ -991,3 +1030,60 @@ def rechunk( if temp_store is not None: sh.rmtree(temp_store) + + +def zip_directory( + root: str | os.PathLike, + zipfile: str | os.PathLike, + delete: bool = False, + **zip_args, +): + r"""Make a zip archive of the content of a directory. + + Parameters + ---------- + root : path + The directory with the content to archive. + zipfile : path + The zip file to create. + delete : bool + If True, the original directory is deleted after zipping. + \*\*zip_args + Any other arguments to pass to :py:mod:`zipfile.ZipFile`, such as "compression". + The default is to make no compression (``compression=ZIP_STORED``). + """ + root = Path(root) + + def _add_to_zip(zf, path, root): + zf.write(path, path.relative_to(root)) + if path.is_dir(): + for subpath in path.iterdir(): + _add_to_zip(zf, subpath, root) + + with ZipFile(zipfile, "w", **zip_args) as zf: + for file in root.iterdir(): + _add_to_zip(zf, file, root) + + if delete: + sh.rmtree(root) + + +def unzip_directory(zipfile: str | os.PathLike, root: str | os.PathLike): + r"""Unzip an archive to a directory. + + This function is the exact opposite of :py:func:`xscen.io.zip_directory`. + + Parameters + ---------- + zipfile : path + The zip file to read. + root : path + The directory where to put the content to archive. + If doesn't exist, it will be created (and all its parents). + If it exists, should be empty. + """ + root = Path(root) + root.mkdir(parents=True, exist_ok=True) + + with ZipFile(zipfile, "r") as zf: + zf.extractall(root) diff --git a/xscen/reduce.py b/src/xscen/reduce.py similarity index 91% rename from xscen/reduce.py rename to src/xscen/reduce.py index b7310769..5dd4d835 100644 --- a/xscen/reduce.py +++ b/src/xscen/reduce.py @@ -1,7 +1,6 @@ """Functions to reduce an ensemble of simulations.""" import warnings -from typing import Optional, Union import numpy as np import xarray as xr @@ -12,10 +11,10 @@ @parse_config def build_reduction_data( - datasets: Union[dict, list[xr.Dataset]], + datasets: dict | list[xr.Dataset], *, - xrfreqs: Optional[list[str]] = None, - horizons: Optional[list[str]] = None, + xrfreqs: list[str] | None = None, + horizons: list[str] | None = None, ) -> xr.DataArray: """Construct the input required for ensemble reduction. @@ -37,7 +36,7 @@ def build_reduction_data( 2D DataArray of dimensions "realization" and "criteria", to be used as input for ensemble reduction. """ warnings.warn( - "This function will be dropped in a future version, as it is now redundant with xclim.ensembles.make_criteria." + "This function will be dropped in v0.11.0, as it is now redundant with xclim.ensembles.make_criteria." "Either use xclim.ensembles.make_criteria directly (preceded by xclim.ensembles.create_ensemble if needed) or " "use xscen's reduce_ensemble function to build the criteria and reduce the ensemble in one step.", FutureWarning, @@ -84,11 +83,11 @@ def build_reduction_data( @parse_config def reduce_ensemble( - data: Union[xr.DataArray, dict, list, xr.Dataset], + data: xr.DataArray | dict | list | xr.Dataset, method: str, *, - horizons: Optional[list[str]] = None, - create_kwargs: Optional[dict] = None, + horizons: list[str] | None = None, + create_kwargs: dict | None = None, **kwargs, ): r"""Reduce an ensemble of simulations using clustering algorithms from xclim.ensembles. @@ -128,7 +127,7 @@ def reduce_ensemble( You can use py:func:`xscen.utils.unstack_dates` on seasonal or monthly indicators to this end. """ warnings.warn( - "This function has been moved to xscen.ensembles.reduce_ensemble. This version will be dropped in a future release.", + "This function has been moved to xscen.ensembles.reduce_ensemble. This version will be dropped in v0.11.0.", FutureWarning, ) return reduce_ensemble( @@ -140,7 +139,7 @@ def reduce_ensemble( ) -def _concat_criteria(criteria: Optional[xr.DataArray], ens: xr.Dataset): +def _concat_criteria(criteria: xr.DataArray | None, ens: xr.Dataset): """Combine all variables and dimensions excepting 'realization'.""" if criteria is None: i = 0 diff --git a/src/xscen/regrid.py b/src/xscen/regrid.py new file mode 100644 index 00000000..66b8871b --- /dev/null +++ b/src/xscen/regrid.py @@ -0,0 +1,481 @@ +"""Functions to regrid datasets.""" + +import datetime +import operator +import os +import random +import string +import warnings +from copy import deepcopy +from pathlib import Path + +import cartopy.crs as ccrs +import cf_xarray as cfxr +import numpy as np +import xarray as xr +from xclim.core.units import convert_units_to + +try: + import xesmf as xe + from xesmf.frontend import Regridder +except ImportError: + xe = None + Regridder = "xesmf.Regridder" + +from .config import parse_config + +__all__ = ["create_bounds_gridmapping", "create_mask", "regrid_dataset"] + + +@parse_config +def regrid_dataset( # noqa: C901 + ds: xr.Dataset, + ds_grid: xr.Dataset, + *, + weights_location: str | os.PathLike | None = None, + regridder_kwargs: dict | None = None, + intermediate_grids: dict | None = None, + to_level: str = "regridded", +) -> xr.Dataset: + """Regrid a dataset according to weights and a reference grid. + + Based on an intake_esm catalog, this function performs regridding on Zarr files. + + Parameters + ---------- + ds : xarray.Dataset + Dataset to regrid. The Dataset needs to have lat/lon coordinates. + Supports a 'mask' variable compatible with ESMF standards. + ds_grid : xr.Dataset + Destination grid. The Dataset needs to have lat/lon coordinates. + Supports a 'mask' variable compatible with ESMF standards. + weights_location : Union[str, os.PathLike], optional + Path to the folder where weight file is saved. Leave as None to force re-computation of weights. + Note that in order to reuse the weights, ds and ds_grid should both have the 'cat:id' and 'cat:domain' attributes. + regridder_kwargs : dict, optional + Arguments to send xe.Regridder(). If it contains `skipna` or `output_chunks`, those + are passed to the regridder call directly. + intermediate_grids : dict, optional + This argument is used to do a regridding in many steps, regridding to regular + grids before regridding to the final ds_grid. + This is useful when there is a large jump in resolution between ds and ds grid. + The format is a nested dictionary shown in Notes. + If None, no intermediary grid is used, there is only a regrid from ds to ds_grid. + to_level : str + The processing level to assign to the output. + Defaults to 'regridded' + + Returns + ------- + xarray.Dataset + Regridded dataset + + Notes + ----- + intermediate_grids = + {'name_of_inter_grid_1': {'cf_grid_2d': {arguments for util.cf_grid_2d },'regridder_kwargs':{arguments for xe.Regridder}}, + 'name_of_inter_grid_2': dictionary_as_above} + + See Also + -------- + xesmf.regridder, xesmf.util.cf_grid_2d + """ + if xe is None: + raise ImportError( + "xscen's regridding functionality requires xESMF to work, please install that package." + ) + ds = ds.copy() + regridder_kwargs = regridder_kwargs or {} + + # We modify the dataset later, so we need to keep track of whether it had lon_bounds and lat_bounds to begin with + has_lon_bounds = "lon_bounds" in ds + has_lat_bounds = "lat_bounds" in ds + + # Generate unique IDs to name the weights file, but remove the member and experiment from the dataset ID + if weights_location is not None: + dsid = ( + ds.attrs.get("cat:id", _generate_random_string(15)) + .replace(ds.attrs.get("cat:member", ""), "") + .replace(ds.attrs.get("cat:experiment", ""), "") + ) + dsid = f"{dsid}_{ds.attrs.get('cat:domain', _generate_random_string(15))}" + gridid = f"{ds_grid.attrs.get('cat:id', _generate_random_string(15))}_{ds_grid.attrs.get('cat:domain', _generate_random_string(15))}" + + ds_grids = [] # List of target grids + reg_arguments = [] # List of accompanying arguments for xe.Regridder() + if intermediate_grids: + for name_inter, dict_inter in intermediate_grids.items(): + reg_arguments.append(dict_inter["regridder_kwargs"]) + ds_grids.append(xe.util.cf_grid_2d(**dict_inter["cf_grid_2d"])) + + ds_grids.append(ds_grid) # Add the final ds_grid + reg_arguments.append(regridder_kwargs) # Add the final regridder_kwargs + + out = None + + # If the grid is the same, skip the call to xESMF + if ds["lon"].equals(ds_grid["lon"]) & ds["lat"].equals(ds_grid["lat"]): + out = ds + if "mask" in out: + out = out.where(out.mask == 1) + out = out.drop_vars(["mask"]) + if "mask" in ds_grid: + out = out.where(ds_grid.mask == 1) + + else: + for i, (ds_grid, regridder_kwargs) in enumerate(zip(ds_grids, reg_arguments)): + # If this is not the first iteration (out != None), + # get the result from the last iteration (out) as input + ds = out or ds + kwargs = deepcopy(regridder_kwargs) + + # Prepare the weight file + if weights_location is not None: + Path(weights_location).mkdir(parents=True, exist_ok=True) + weights_filename = Path( + weights_location, + f"{dsid}_{gridid}_regrid{i}" + f"{'_'.join(kwargs[k] for k in kwargs if isinstance(kwargs[k], str))}.nc", + ) + + # Re-use existing weight file if possible + if Path(weights_filename).is_file() and not ( + ("reuse_weights" in kwargs) and (kwargs["reuse_weights"] is False) + ): + kwargs["weights"] = weights_filename + kwargs["reuse_weights"] = True + else: + weights_filename = None + + # Extract args that are to be given at call time. + # output_chunks is only valid for xesmf >= 0.8, so don't add it be default to the call_kwargs + call_kwargs = {"skipna": kwargs.pop("skipna", False)} + if "output_chunks" in kwargs: + call_kwargs["output_chunks"] = kwargs.pop("output_chunks") + + regridder = _regridder( + ds_in=ds, ds_grid=ds_grid, filename=weights_filename, **kwargs + ) + + # The regridder (when fed Datasets) doesn't like if 'mask' is present. + if "mask" in ds: + ds = ds.drop_vars(["mask"]) + + out = regridder(ds, keep_attrs=True, **call_kwargs) + + # Double-check that grid_mapping information is transferred + gridmap_out = _get_grid_mapping(ds_grid) + if gridmap_out: + # Regridder seems to seriously mess up the rotated dimensions + for d in out.lon.dims: + out[d] = ds_grid[d] + if d not in out.coords: + out = out.assign_coords({d: ds_grid[d]}) + # Add the grid_mapping attribute + for v in out.data_vars: + if any( + d in out[v].dims + for d in [out.cf.axes["X"][0], out.cf.axes["Y"][0]] + ): + out[v].attrs["grid_mapping"] = gridmap_out + # Add the grid_mapping coordinate + if gridmap_out not in out: + out = out.assign_coords({gridmap_out: ds_grid[gridmap_out]}) + else: + gridmap_in = _get_grid_mapping(ds) + # Remove the original grid_mapping attribute + for v in out.data_vars: + if "grid_mapping" in out[v].attrs: + out[v].attrs.pop("grid_mapping") + # Remove the original grid_mapping coordinate if it is still in the output + out = out.drop_vars(gridmap_in, errors="ignore") + + # cf_grid_2d adds temporary variables that we don't want to keep + if "lon_bounds" in out and has_lon_bounds is False: + out = out.drop_vars("lon_bounds") + if "lat_bounds" in out and has_lat_bounds is False: + out = out.drop_vars("lat_bounds") + + # History + kwargs_for_hist = deepcopy(regridder_kwargs) + kwargs_for_hist.setdefault("method", regridder.method) + if intermediate_grids and i < len(intermediate_grids): + name_inter = list(intermediate_grids.keys())[i] + cf_grid_2d_args = intermediate_grids[name_inter]["cf_grid_2d"] + new_history = ( + f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] " + f"regridded with regridder arguments {kwargs_for_hist} to a xesmf" + f" cf_grid_2d with arguments {cf_grid_2d_args} - xESMF v{xe.__version__}" + ) + else: + new_history = ( + f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] " + f"regridded with arguments {kwargs_for_hist} - xESMF v{xe.__version__}" + ) + history = ( + f"{new_history}\n{out.attrs['history']}" + if "history" in out.attrs + else new_history + ) + out.attrs["history"] = history + + out = out.drop_vars("latitude_longitude", errors="ignore") + # Attrs + out.attrs["cat:processing_level"] = to_level + out.attrs["cat:domain"] = ( + ds_grid.attrs["cat:domain"] if "cat:domain" in ds_grid.attrs else None + ) + return out + + +@parse_config +def create_mask( + ds: xr.Dataset | xr.DataArray, + *, + variable: str | None = None, + where_operator: str | None = None, + where_threshold: float | str | None = None, + mask_nans: bool = True, +) -> xr.DataArray: + """Create a 0-1 mask based on incoming arguments. + + Parameters + ---------- + ds : xr.Dataset or xr.DataArray + Dataset or DataArray to be evaluated. If a time dimension is present, the first time step will be used. + variable : str, optional + If using a Dataset, the variable on which to base the mask. + where_operator : str, optional + Operator to use for the threshold comparison. One of "<", "<=", "==", "!=", ">=", ">". + Needs to be used with `where_threshold`. + where_threshold : float or str, optional + Threshold value to use for the comparison. A string can be used to reference units, e.g. "10 mm/day". + Needs to be used with `where_operator`. + mask_nans : bool, optional + Whether to mask NaN values in the mask array. Default is True. + + Returns + ------- + xr.DataArray + Mask array. + """ + # Prepare the mask for the destination grid + ops = { + "<": operator.lt, + "<=": operator.le, + "==": operator.eq, + "!=": operator.ne, + ">=": operator.ge, + ">": operator.gt, + } + + def cmp(arg1, op, arg2): + operation = ops.get(op) + return operation(arg1, arg2) + + if isinstance(ds, xr.Dataset): + if variable is None: + raise ValueError("A variable needs to be specified when passing a Dataset.") + ds = ds[variable].copy() + else: + ds = ds.copy() + if "time" in ds.dims: + ds = ds.isel(time=0) + + mask = xr.ones_like(ds) + mask.attrs = {"long_name": "Mask"} + mask.name = "mask" + + # Create the mask based on the threshold + if (where_operator is not None and where_threshold is None) or ( + where_operator is None and where_threshold is not None + ): + raise ValueError( + "'where_operator' and 'where_threshold' must be used together." + ) + if where_threshold is not None: + mask.attrs["where_threshold"] = f"{variable} {where_operator} {where_threshold}" + if isinstance(where_threshold, str): + ds = convert_units_to(ds, where_threshold.split(" ")[1]) + where_threshold = float(where_threshold.split(" ")[0]) + + mask = xr.where( + cmp(ds, where_operator, where_threshold), mask, 0, keep_attrs=True + ) + + # Mask NaNs + if mask_nans: + mask = xr.where(ds.notnull(), mask, 0, keep_attrs=True) + mask.attrs["mask_NaNs"] = "True" + else: + # The where clause above will mask NaNs, so we need to revert that + attrs = mask.attrs + mask = xr.where(ds.isnull(), 1, mask) + mask.attrs = attrs + mask.attrs["mask_NaNs"] = "False" + + return mask + + +def _regridder( + ds_in: xr.Dataset, + ds_grid: xr.Dataset, + *, + filename: str | os.PathLike | None = None, + method: str = "bilinear", + unmapped_to_nan: bool | None = True, + **kwargs, +) -> Regridder: + """Call to xesmf Regridder with a few default arguments. + + Parameters + ---------- + ds_in : xr.Dataset + Incoming grid. The Dataset needs to have lat/lon coordinates. + ds_grid : xr.Dataset + Destination grid. The Dataset needs to have lat/lon coordinates. + filename : str or os.PathLike, optional + Path to the NetCDF file with weights information. + method : str + Interpolation method. + unmapped_to_nan : bool, optional + Arguments to send xe.Regridder(). + regridder_kwargs : dict + Arguments to send xe.Regridder(). + + Returns + ------- + xe.frontend.Regridder + Regridder object + """ + if method.startswith("conservative"): + gridmap_in = _get_grid_mapping(ds_in) + gridmap_grid = _get_grid_mapping(ds_grid) + + if ( + ds_in.cf["longitude"].ndim == 2 + and "longitude" not in ds_in.cf.bounds + and gridmap_in in ds_in + ): + ds_in = ds_in.update(create_bounds_gridmapping(ds_in, gridmap_in)) + if ( + ds_grid.cf["longitude"].ndim == 2 + and "longitude" not in ds_grid.cf.bounds + and gridmap_grid in ds_grid + ): + ds_grid = ds_grid.update(create_bounds_gridmapping(ds_grid, gridmap_grid)) + + regridder = xe.Regridder( + ds_in=ds_in, + ds_out=ds_grid, + method=method, + unmapped_to_nan=unmapped_to_nan, + **kwargs, + ) + if filename is not None and not Path(filename).is_file(): + regridder.to_netcdf(filename) + + return regridder + + +def create_bounds_rotated_pole(ds: xr.Dataset) -> xr.Dataset: + warnings.warn( + "This function is deprecated and will be removed in xscen v0.12.0. Use create_bounds_gridmapping instead.", + FutureWarning, + ) + return create_bounds_gridmapping(ds, "rotated_pole") + + +def create_bounds_gridmapping(ds: xr.Dataset, gridmap: str) -> xr.Dataset: + """Create bounds for rotated pole datasets.""" + xname = ds.cf.axes["X"][0] + yname = ds.cf.axes["Y"][0] + + ds = ds.cf.add_bounds([yname, xname]) + + # In "vertices" format then expand to 2D. From (N, 2) to (N+1,) to (N+1, M+1) + yv1D = cfxr.bounds_to_vertices(ds[f"{yname}_bounds"], "bounds") + xv1D = cfxr.bounds_to_vertices(ds[f"{xname}_bounds"], "bounds") + yv = yv1D.expand_dims(dict([(f"{xname}_vertices", xv1D)])).transpose( + f"{xname}_vertices", f"{yname}_vertices" + ) + xv = xv1D.expand_dims(dict([(f"{yname}_vertices", yv1D)])).transpose( + f"{xname}_vertices", f"{yname}_vertices" + ) + + # Some CRS have additional attributes according to CF conventions + def _get_opt_attr_as_float(da: xr.DataArray, attr: str) -> float | None: + return float(da.attrs[attr]) if attr in da.attrs else None + + if gridmap == "rotated_pole": + # Get cartopy's crs for the projection + RP = ccrs.RotatedPole( + pole_longitude=float(ds.rotated_pole.grid_north_pole_longitude), + pole_latitude=float(ds.rotated_pole.grid_north_pole_latitude), + central_rotated_longitude=_get_opt_attr_as_float( + ds.rotated_pole, "north_pole_grid_longitude" + ), + ) + elif gridmap == "oblique_mercator": + RP = ccrs.ObliqueMercator( + central_longitude=float(ds.oblique_mercator.longitude_of_projection_origin), + central_latitude=float(ds.oblique_mercator.latitude_of_projection_origin), + false_easting=_get_opt_attr_as_float(ds.oblique_mercator, "false_easting"), + false_northing=_get_opt_attr_as_float( + ds.oblique_mercator, "false_northing" + ), + scale_factor=float(ds.oblique_mercator.scale_factor_at_projection_origin), + azimuth=float(ds.oblique_mercator.azimuth_of_central_line), + ) + else: + raise NotImplementedError(f"Grid mapping {gridmap} not yet implemented.") + + PC = ccrs.PlateCarree() + + # Project points + pts = PC.transform_points(RP, xv.values, yv.values) + lonv = xv.copy(data=pts[..., 0]).rename("lon_vertices") + latv = yv.copy(data=pts[..., 1]).rename("lat_vertices") + + # Back to CF bounds format. From (N+1, M+1) to (4, N, M) + lonb = cfxr.vertices_to_bounds(lonv, ("bounds", xname, yname)).rename("lon_bounds") + latb = cfxr.vertices_to_bounds(latv, ("bounds", xname, yname)).rename("lat_bounds") + + # Create dataset, set coords and attrs + ds_bnds = xr.merge([lonb, latb]).assign( + dict([("lon", ds.lon), ("lat", ds.lat), (gridmap, ds[gridmap])]) + ) + ds_bnds[yname] = ds[yname] + ds_bnds[xname] = ds[xname] + ds_bnds.lat.attrs["bounds"] = "lat_bounds" + ds_bnds.lon.attrs["bounds"] = "lon_bounds" + return ds_bnds.transpose(*ds.lon.dims, "bounds") + + +def _get_grid_mapping(ds: xr.Dataset) -> str: + """Get the grid_mapping attribute from the dataset.""" + gridmap = [ + ds[v].attrs["grid_mapping"] + for v in ds.data_vars + if "grid_mapping" in ds[v].attrs + ] + gridmap += [c for c in ds.coords if ds[c].attrs.get("grid_mapping_name", None)] + gridmap = list(np.unique(gridmap)) + + if len(gridmap) > 1: + warnings.warn( + f"There are conflicting grid_mapping attributes in the dataset. Assuming {gridmap[0]}." + ) + + return gridmap[0] if gridmap else "" + + +def _generate_random_string(length: int): + characters = string.ascii_letters + string.digits + + # Random seed based on the current time + random.seed(datetime.datetime.now().timestamp()) + random_string = "".join( + random.choice(characters) for i in range(length) # noqa: S311 + ) + return random_string diff --git a/xscen/scripting.py b/src/xscen/scripting.py similarity index 91% rename from xscen/scripting.py rename to src/xscen/scripting.py index 7ec0b86c..68e3677b 100644 --- a/xscen/scripting.py +++ b/src/xscen/scripting.py @@ -15,7 +15,6 @@ from io import BytesIO from pathlib import Path from traceback import format_exception -from typing import Optional, Union import xarray as xr from matplotlib.figure import Figure @@ -43,12 +42,12 @@ def send_mail( *, subject: str, msg: str, - to: Optional[str] = None, + to: str | None = None, server: str = "127.0.0.1", port: int = 25, - attachments: Optional[ - list[Union[tuple[str, Union[Figure, os.PathLike]], Figure, os.PathLike]] - ] = None, + attachments: None | ( + list[tuple[str, Figure | os.PathLike] | Figure | os.PathLike] + ) = None, ) -> None: """Send email. @@ -160,9 +159,9 @@ def err_handler(self, *exc_info): @parse_config def send_mail_on_exit( *, - subject: Optional[str] = None, - msg_ok: Optional[str] = None, - msg_err: Optional[str] = None, + subject: str | None = None, + msg_ok: str | None = None, + msg_err: str | None = None, on_error_only: bool = False, skip_ctrlc: bool = True, **mail_kwargs, @@ -244,7 +243,7 @@ class measure_time: def __init__( self, - name: Optional[str] = None, + name: str | None = None, cpu: bool = False, logger: logging.Logger = logger, ): @@ -255,7 +254,8 @@ def __init__( def __enter__(self): # noqa: D105 self.start = time.perf_counter() self.start_cpu = time.process_time() - self.logger.info(f"Started process {self.name}.") + msg = f"Started process {self.name}." + self.logger.info(msg) return def __exit__(self, *args, **kwargs): # noqa: D105 @@ -269,7 +269,8 @@ def __exit__(self, *args, **kwargs): # noqa: D105 self.logger.info(s) -class TimeoutException(Exception): +# FIXME: This should be written as "TimeoutError" +class TimeoutException(Exception): # noqa: N818 """An exception raised with a timeout occurs.""" def __init__(self, seconds: int, task: str = "", **kwargs): @@ -309,9 +310,7 @@ def _timeout_handler(signum, frame): @contextmanager -def skippable( - seconds: int = 2, task: str = "", logger: Optional[logging.Logger] = None -): +def skippable(seconds: int = 2, task: str = "", logger: logging.Logger | None = None): """Skippable context manager. When CTRL-C (SIGINT, KeyboardInterrupt) is sent within the context, @@ -353,11 +352,11 @@ def skippable( def save_and_update( ds: xr.Dataset, pcat: ProjectCatalog, - path: Optional[Union[str, os.PathLike]] = None, - file_format: Optional[str] = None, - build_path_kwargs: Optional[dict] = None, - save_kwargs: Optional[dict] = None, - update_kwargs: Optional[dict] = None, + path: str | os.PathLike | None = None, + file_format: str | None = None, + build_path_kwargs: dict | None = None, + save_kwargs: dict | None = None, + update_kwargs: dict | None = None, ): """ Construct the path, save and delete. @@ -424,13 +423,14 @@ def save_and_update( # update catalog pcat.update_from_ds(ds=ds, path=path, **update_kwargs) - logger.info(f"File {path} has saved succesfully and the catalog was updated.") + msg = f"File {path} has been saved successfully and the catalog was updated." + logger.info(msg) def move_and_delete( - moving: list[list[Union[str, os.PathLike]]], + moving: list[list[str | os.PathLike]], pcat: ProjectCatalog, - deleting: Optional[list[Union[str, os.PathLike]]] = None, + deleting: list[str | os.PathLike] | None = None, copy: bool = False, ): """ @@ -456,7 +456,8 @@ def move_and_delete( source, dest = files[0], files[1] if Path(source).exists(): if copy: - logger.info(f"Copying {source} to {dest}.") + msg = f"Copying {source} to {dest}." + logger.info(msg) copied_files = copy_tree(source, dest) for f in copied_files: # copied files don't include zarr files @@ -467,13 +468,15 @@ def move_and_delete( ds = xr.open_dataset(f) pcat.update_from_ds(ds=ds, path=f) else: - logger.info(f"Moving {source} to {dest}.") + msg = f"Moving {source} to {dest}." + logger.info(msg) sh.move(source, dest) if Path(dest).suffix in [".zarr", ".nc"]: ds = xr.open_dataset(dest) pcat.update_from_ds(ds=ds, path=dest) else: - logger.info(f"You are trying to move {source}, but it does not exist.") + msg = f"You are trying to move {source}, but it does not exist." + logger.info(msg) else: raise ValueError("`moving` should be a list of lists.") @@ -481,9 +484,10 @@ def move_and_delete( if isinstance(deleting, list): for dir_to_delete in deleting: if Path(dir_to_delete).exists() and Path(dir_to_delete).is_dir(): - logger.info(f"Deleting content inside {dir_to_delete}.") + msg = f"Deleting content inside {dir_to_delete}." + logger.info(msg) sh.rmtree(dir_to_delete) - os.mkdir(dir_to_delete) + Path(dir_to_delete).mkdir() elif deleting is None: pass else: diff --git a/xscen/spatial.py b/src/xscen/spatial.py similarity index 91% rename from xscen/spatial.py rename to src/xscen/spatial.py index 8ebe2239..a881111f 100644 --- a/xscen/spatial.py +++ b/src/xscen/spatial.py @@ -6,7 +6,6 @@ import warnings from collections.abc import Sequence from pathlib import Path -from typing import Optional, Union import clisops.core.subset import dask @@ -15,6 +14,7 @@ import sparse as sp import xarray as xr import xclim as xc +from pyproj.crs import CRS from .config import parse_config @@ -139,7 +139,7 @@ def subset( ds: xr.Dataset, method: str, *, - name: Optional[str] = None, + name: str | None = None, tile_buffer: float = 0, **kwargs, ) -> xr.Dataset: @@ -204,10 +204,10 @@ def subset( def _subset_gridpoint( ds: xr.Dataset, - lon: Union[float, Sequence[float], xr.DataArray], - lat: Union[float, Sequence[float], xr.DataArray], + lon: float | Sequence[float] | xr.DataArray, + lat: float | Sequence[float] | xr.DataArray, *, - name: Optional[str] = None, + name: str | None = None, **kwargs, ) -> xr.Dataset: r"""Subset the data to a gridpoint. @@ -253,10 +253,10 @@ def _subset_gridpoint( def _subset_bbox( ds: xr.Dataset, - lon_bnds: Union[tuple[float, float], list[float]], - lat_bnds: Union[tuple[float, float], list[float]], + lon_bnds: tuple[float, float] | list[float], + lat_bnds: tuple[float, float] | list[float], *, - name: Optional[str] = None, + name: str | None = None, tile_buffer: float = 0, **kwargs, ) -> xr.Dataset: @@ -316,9 +316,9 @@ def _subset_bbox( def _subset_shape( ds: xr.Dataset, - shape: Union[str, Path, gpd.GeoDataFrame], + shape: str | Path | gpd.GeoDataFrame, *, - name: Optional[str] = None, + name: str | None = None, tile_buffer: float = 0, **kwargs, ) -> xr.Dataset: @@ -357,20 +357,41 @@ def _subset_shape( "Both tile_buffer and clisops' buffer were requested. Use only one." ) lon_res, lat_res = _estimate_grid_resolution(ds) + + # The buffer argument needs to be in the same units as the shapefile, so it's simpler to always project the shapefile to WGS84. + if isinstance(shape, str | Path): + shape = gpd.read_file(shape) + + try: + shape_crs = shape.crs + except AttributeError: + shape_crs = None + if shape_crs is None: + warnings.warn( + "Shapefile does not have a CRS. Compatibility with the dataset is not guaranteed.", + category=UserWarning, + ) + elif shape_crs != CRS(4326): # WGS84 + warnings.warn( + "Shapefile is not in EPSG:4326. Reprojecting to this CRS.", + UserWarning, + ) + shape = shape.to_crs(4326) + kwargs["buffer"] = np.max([lon_res, lat_res]) * tile_buffer ds_subset = clisops.core.subset_shape(ds, shape=shape, **kwargs) new_history = ( f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] " f"shape spatial subsetting with {'buffer=' + str(tile_buffer) if tile_buffer > 0 else 'no buffer'}" - f", shape={Path(shape).name if isinstance(shape, (str, Path)) else 'gpd.GeoDataFrame'}" + f", shape={Path(shape).name if isinstance(shape, str | Path) else 'gpd.GeoDataFrame'}" f" - clisops v{clisops.__version__}" ) return update_history_and_name(ds_subset, new_history, name) -def _subset_sel(ds: xr.Dataset, *, name: Optional[str] = None, **kwargs) -> xr.Dataset: +def _subset_sel(ds: xr.Dataset, *, name: str | None = None, **kwargs) -> xr.Dataset: r"""Subset the data using the .sel() method. Parameters diff --git a/xscen/testing.py b/src/xscen/testing.py similarity index 98% rename from xscen/testing.py rename to src/xscen/testing.py index 866b0252..65807422 100644 --- a/xscen/testing.py +++ b/src/xscen/testing.py @@ -1,7 +1,5 @@ """Testing utilities for xscen.""" -from typing import Optional, Union - import cartopy.crs as ccrs import numpy as np import pandas as pd @@ -22,9 +20,9 @@ def datablock_3d( y_step: float = 0.1, start: str = "7/1/2000", freq: str = "D", - units: Optional[str] = None, + units: str | None = None, as_dataset: bool = False, -) -> Union[xr.DataArray, xr.Dataset]: +) -> xr.DataArray | xr.Dataset: """Create a generic timeseries object based on pre-defined dictionaries of existing variables. Parameters diff --git a/xscen/utils.py b/src/xscen/utils.py similarity index 66% rename from xscen/utils.py rename to src/xscen/utils.py index c354e9ec..0dcc8871 100644 --- a/xscen/utils.py +++ b/src/xscen/utils.py @@ -6,14 +6,16 @@ import logging import os import re +import warnings from collections import defaultdict from collections.abc import Sequence +from copy import deepcopy from datetime import datetime from io import StringIO from itertools import chain from pathlib import Path from types import ModuleType -from typing import Optional, TextIO, Union +from typing import TextIO import cftime import flox.xarray @@ -22,7 +24,7 @@ import xarray as xr from xarray.coding import cftime_offsets as cfoff from xclim.core import units -from xclim.core.calendar import convert_calendar, get_calendar, parse_offset +from xclim.core.calendar import parse_offset from xclim.core.options import METADATA_LOCALES from xclim.core.options import OPTIONS as XC_OPTIONS from xclim.core.utils import uses_dask @@ -57,8 +59,9 @@ TRANSLATOR = defaultdict(lambda: lambda s: s) """Dictionary of translating objects. -Each key is a two letter locale code and values are functions that return the translated message as compiled in the gettext catalogs. -If a language is not defined or a message not translated, the function will return the raw message. +Each key is a two letter locale code and values are functions that return the translated message +as compiled in the gettext catalogs. If a language is not defined or a message not translated, +the function will return the raw message. """ try: @@ -69,22 +72,23 @@ ).gettext except FileNotFoundError as err: raise ImportError( - "Your xscen installation doesn't have compiled translations. Run `make translate` from the source directory to fix." + "Your xscen installation doesn't have compiled translations. " + "Run `make translate` from the source directory to fix." ) from err def update_attr( - ds: Union[xr.Dataset, xr.DataArray], + ds: xr.Dataset | xr.DataArray, attr: str, new: str, - others: Optional[Sequence[Union[xr.Dataset, xr.DataArray]]] = None, + others: Sequence[xr.Dataset | xr.DataArray] | None = None, **fmt, -) -> Union[xr.Dataset, xr.DataArray]: - """Format an attribute referencing itself in a translatable way. +) -> xr.Dataset | xr.DataArray: + r"""Format an attribute referencing itself in a translatable way. Parameters ---------- - ds: Dataset or DataArray + ds : Dataset or DataArray The input object with the attribute to update. attr : str Attribute name. @@ -96,7 +100,7 @@ def update_attr( These can be referenced as "{attrXX}" in `new`, where XX is the based-1 index of the other source in `others`. If they don't have the `attr` attribute, an empty string is sent to the string formatting. See notes. - fmt: + \*\*fmt Other formatting data. Returns @@ -133,24 +137,27 @@ def update_attr( others = others or [] # .strip(' .') removes trailing and leading whitespaces and dots if attr in ds.attrs: - others = { + + others_attrs = { f"attr{i}": dso.attrs.get(attr, "").strip(" .") for i, dso in enumerate(others, 1) } - ds.attrs[attr] = new.format(attr=ds.attrs[attr].strip(" ."), **others, **fmt) + ds.attrs[attr] = new.format( + attr=ds.attrs[attr].strip(" ."), **others_attrs, **fmt + ) # All existing locales for key in fnmatch.filter(ds.attrs.keys(), f"{attr}_??"): loc = key[-2:] - others = { + others_attrs = { f"attr{i}": dso.attrs.get(key, dso.attrs.get(attr, "")).strip(" .") for i, dso in enumerate(others, 1) } ds.attrs[key] = TRANSLATOR[loc](new).format( - attr=ds.attrs[key].strip(" ."), **others, **fmt + attr=ds.attrs[key].strip(" ."), **others_attrs, **fmt ) -def add_attr(ds: Union[xr.Dataset, xr.DataArray], attr: str, new: str, **fmt): +def add_attr(ds: xr.Dataset | xr.DataArray, attr: str, new: str, **fmt): """Add a formatted translatable attribute to a dataset.""" ds.attrs[attr] = new.format(**fmt) for loc in XC_OPTIONS[METADATA_LOCALES]: @@ -158,13 +165,13 @@ def add_attr(ds: Union[xr.Dataset, xr.DataArray], attr: str, new: str, **fmt): def date_parser( # noqa: C901 - date: Union[str, cftime.datetime, pd.Timestamp, datetime, pd.Period], + date: str | cftime.datetime | pd.Timestamp | datetime | pd.Period, *, - end_of_period: Union[bool, str] = False, + end_of_period: bool | str = False, out_dtype: str = "datetime", strtime_format: str = "%Y-%m-%d", freq: str = "H", -) -> Union[str, pd.Period, pd.Timestamp]: +) -> str | pd.Period | pd.Timestamp: """Return a datetime from a string. Parameters @@ -173,7 +180,8 @@ def date_parser( # noqa: C901 Date to be converted end_of_period : bool or str If 'YE' or 'ME', the returned date will be the end of the year or month that contains the received date. - If True, the period is inferred from the date's precision, but `date` must be a string, otherwise nothing is done. + If True, the period is inferred from the date's precision, but `date` must be a string, + otherwise nothing is done. out_dtype : str Choices are 'datetime', 'period' or 'str' strtime_format : str @@ -264,24 +272,64 @@ def _parse_date(date, fmts): def minimum_calendar(*calendars) -> str: """Return the minimum calendar from a list. - Uses the hierarchy: 360_day < noleap < standard < all_leap, - and returns one of those names. + Uses the hierarchy: 360_day < noleap < standard < all_leap, and returns one of those names. """ + # Unwrap any lists or tuples given in the input, but without destroying strings. + calendars = [[cal] if isinstance(cal, str) else cal for cal in calendars] + calendars = list(chain(*calendars)) + + # Raise an error if the calendars are not recognized + unknowns = set(calendars).difference( + [ + "360_day", + "365_day", + "noleap", + "standard", + "default", + "all_leap", + "366_day", + "gregorian", + "proleptic_gregorian", + ] + ) + if unknowns: + warnings.warn( + f"These calendars are not recognized: {unknowns}. Results may be incorrect.", + ) + if "360_day" in calendars: - return "360_day" + out = "360_day" + elif "noleap" in calendars or "365_day" in calendars: + out = "noleap" + elif all(cal in ["all_leap", "366_day"] for cal in calendars): + out = "all_leap" + else: + out = "standard" - if "noleap" in calendars or "365_day" in calendars: - return "noleap" + return out - if all(cal in ["all_leap", "366_day"] for cal in calendars): - return "all_leap" - return "standard" +def translate_time_chunk(chunks: dict, calendar: str, timesize: int) -> dict: + """Translate chunk specification for time into a number. + Parameters + ---------- + chunks : dict + Dictionary specifying the chunk sizes for each dimension. The time dimension can be specified as: + -1 : translates to `timesize` + 'Nyear' : translates to N times the number of days in a year of the given calendar. + calendar : str + The calendar type (e.g., 'noleap', '360_day', 'all_leap'). + timesize : int + The size of the time dimension. -def translate_time_chunk(chunks: dict, calendar: str, timesize) -> dict: - """Translate chunk specification for time into a number. + Returns + ------- + dict + The updated chunks dictionary with the time dimension translated to a number. + Notes + ----- -1 translates to `timesize` 'Nyear' translates to N times the number of days in a year of calendar `calendar`. """ @@ -291,10 +339,19 @@ def translate_time_chunk(chunks: dict, calendar: str, timesize) -> dict: elif k == "time" and v is not None: if isinstance(v, str) and v.endswith("year"): n = int(chunks["time"].split("year")[0]) - Nt = n * {"noleap": 365, "360_day": 360, "all_leap": 366}.get( - calendar, 365.25 - ) - chunks[k] = int(Nt) + nt = n * { + "noleap": 365, + "365_day": 365, + "360_day": 360, + "all_leap": 366, + "366_day": 366, + }.get(calendar, 365.25) + if nt != int(nt): + warnings.warn( + f"The number of days in {chunks['time']} for calendar {calendar} is not an integer. " + f"Chunks will not align perfectly with year ends." + ) + chunks[k] = int(nt) elif v == -1: chunks[k] = timesize return chunks @@ -303,10 +360,10 @@ def translate_time_chunk(chunks: dict, calendar: str, timesize) -> dict: @parse_config def stack_drop_nans( ds: xr.Dataset, - mask: xr.DataArray, + mask: xr.DataArray | list[str], *, new_dim: str = "loc", - to_file: Optional[str] = None, + to_file: str | None = None, ) -> xr.Dataset: """Stack dimensions into a single axis and drops indexes where the mask is false. @@ -314,9 +371,10 @@ def stack_drop_nans( ---------- ds : xr.Dataset A dataset with the same coords as `mask`. - mask : xr.DataArray - A boolean DataArray with True on the points to keep. - Mask will be loaded within this function. + mask : xr.DataArray or list of str + A boolean DataArray with True on the points to keep. The mask will be loaded within this function, but not the dataset. + Alternatively, a list of dimension names to stack. In this case, a mask will be created by loading all data and checking for NaNs. + The latter is not recommended for large datasets. new_dim : str The name of the new stacked dim. to_file : str, optional @@ -343,23 +401,37 @@ def stack_drop_nans( -------- unstack_fill_nan : The inverse operation. """ - original_shape = "x".join(map(str, mask.shape)) - - mask_1d = mask.stack({new_dim: mask.dims}) - out = ds.stack({new_dim: mask.dims}).where(mask_1d, drop=True).reset_index(new_dim) + if isinstance(mask, xr.DataArray): + mask_1d = mask.stack({new_dim: mask.dims}) + out = ds.stack({new_dim: mask.dims}).where(mask_1d, drop=True) + else: + mask = ds.coords.to_dataset().drop_vars( + [v for v in ds.coords if not any(d in mask for d in ds[v].dims)] + ) + mask = xr.DataArray( + np.ones(list(mask.sizes.values())), dims=mask.dims, coords=mask.coords + ) # Make it a DataArray to fit the rest of the function + out = ds.stack({new_dim: mask.dims}).dropna(new_dim, how="all") + out = out.reset_index(new_dim) for dim in mask.dims: out[dim].attrs.update(ds[dim].attrs) + original_shape = "x".join(map(str, mask.shape)) + if to_file is not None: - # set default path to store the information necessary to unstack - # the name includes the domain and the original shape to uniquely identify the dataset + # Set default path to store the information necessary to unstack + # The name includes the domain and the original shape to uniquely identify the dataset domain = ds.attrs.get("cat:domain", "unknown") to_file = to_file.format(domain=domain, shape=original_shape) if not Path(to_file).parent.exists(): - os.makedirs(Path(to_file).parent, exist_ok=True) + Path(to_file).parent.mkdir(exist_ok=True) + # Add all coordinates that might have been affected by the stack + mask = mask.assign_coords( + {c: ds[c] for c in ds.coords if any(d in mask.dims for d in ds[c].dims)} + ) mask.coords.to_dataset().to_netcdf(to_file) - # carry information about original shape to be able to unstack properly + # Carry information about original shape to be able to unstack properly for dim in mask.dims: out[dim].attrs["original_shape"] = original_shape @@ -379,49 +451,58 @@ def unstack_fill_nan( ds: xr.Dataset, *, dim: str = "loc", - coords: Optional[ - Union[str, os.PathLike, Sequence[Union[str, os.PathLike]], dict] - ] = None, + coords: None | ( + str | os.PathLike | Sequence[str | os.PathLike] | dict[str, xr.DataArray] + ) = None, ): """Unstack a Dataset that was stacked by :py:func:`stack_drop_nans`. Parameters ---------- ds : xr.Dataset - A dataset with some dims stacked by `stack_drop_nans`. + A dataset with some dimensions stacked by `stack_drop_nans`. dim : str The dimension to unstack, same as `new_dim` in `stack_drop_nans`. - coords : Sequence of strings, Mapping of str to array, str, optional - If a sequence : if the dataset has coords along `dim` that are not original - dimensions, those original dimensions must be listed here. - If a dict : a mapping from the name to the array of the coords to unstack - If a str : a filename to a dataset containing only those coords (as coords). - If given a string with {shape} and {domain}, the formatting will fill them with - the original shape of the dataset (that should have been store in the - attributes of the stacked dimensions) by `stack_drop_nans` and the global attributes 'cat:domain'. - It is recommended to fill this argument in the config. It will be parsed automatically. - E.g.: - - utils: - stack_drop_nans: - to_file: /some_path/coords/coords_{domain}_{shape}.nc - unstack_fill_nan: - coords: /some_path/coords/coords_{domain}_{shape}.nc - - If None (default), all coords that have `dim` a single dimension are used as the - new dimensions/coords in the unstacked output. - Coordinates will be loaded within this function. + coords : string or os.PathLike or Sequence or dict, optional + Additional information used to reconstruct coordinates that might have been lost in the stacking (e.g., if a lat/lon grid was all NaNs). + If a string or os.PathLike : Path to a dataset containing only those coordinates, such as the output of `to_file` in `stack_drop_nans`. + This is the recommended option. + If a dictionary : A mapping from the name of the coordinate that was stacked to a DataArray. Better alternative if no file is available. + If a sequence : The names of the original dimensions that were stacked. Worst option. + If None (default), same as a sequence, but all coordinates that have `dim` as a single dimension are used as the new dimensions. + See Notes for more information. Returns ------- xr.Dataset Same as `ds`, but `dim` has been unstacked to coordinates in `coords`. Missing elements are filled according to the defaults of `fill_value` of :py:meth:`xarray.Dataset.unstack`. + + Notes + ----- + Some information might have been completely lost in the stacking process, for example, if a longitude is NaN across all latitudes. + It is impossible to recover that information when using `coords` as a list, which is why it is recommended to use a file or a dictionary instead. + + If a dictionary is used, the keys must be the names of the coordinates that were stacked and the values must be the DataArrays. + This method can recover both dimensions and additional coordinates that were not dimensions in the original dataset, but were stacked. + + If the original stacking was done with `stack_drop_nans` and the `to_file` argument was used, the `coords` argument should be a string with + the path to the file. Additionally, the file name can contain the formatting fields {shape} and {domain}, which will be automatically filled + with the original shape of the dataset and the global attribute 'cat:domain'. If using that dynamic path, it is recommended to fill the + argument in the xscen config. + E.g.: + + utils: + stack_drop_nans: + to_file: /some_path/coords/coords_{domain}_{shape}.nc + unstack_fill_nan: + coords: /some_path/coords/coords_{domain}_{shape}.nc """ if coords is None: logger.info("Dataset unstacked using no coords argument.") + coords = [d for d in ds.coords if ds[d].dims == (dim,)] - if isinstance(coords, (str, os.PathLike)): + if isinstance(coords, str | os.PathLike): # find original shape in the attrs of one of the dimension original_shape = "unknown" for c in ds.coords: @@ -429,7 +510,8 @@ def unstack_fill_nan( original_shape = ds[c].attrs["original_shape"] domain = ds.attrs.get("cat:domain", "unknown") coords = coords.format(domain=domain, shape=original_shape) - logger.info(f"Dataset unstacked using {coords}.") + msg = f"Dataset unstacked using {coords}." + logger.info(msg) coords = xr.open_dataset(coords) # separate coords that are dims or not coords_and_dims = { @@ -446,37 +528,58 @@ def unstack_fill_nan( if crd.dims == (dim,) and name in coords_and_dims ] ) - out = ( - ds.drop_vars(dims) - .assign_coords({dim: pd.MultiIndex.from_arrays(crds, names=dims)}) - .unstack(dim) - ) + + mindex_obj = pd.MultiIndex.from_arrays(crds, names=dims) + mindex_coords = xr.Coordinates.from_pandas_multiindex(mindex_obj, dim) + + out = ds.drop_vars(dims).assign_coords(mindex_coords).unstack(dim) # only reindex with the dims out = out.reindex(**coords_and_dims) - # add back the coords that arent dims + # add back the coords that aren't dims for c in coords_not_dims: out[c] = coords[c] else: - if isinstance(coords, (list, tuple)): - dims, crds = zip(*[(name, ds[name].load().values) for name in coords]) - else: - dims, crds = zip( - *[ - (name, crd.load().values) - for name, crd in ds.coords.items() - if crd.dims == (dim,) - ] - ) + coord_not_dim = {} + # Special case where the dictionary contains both dimensions and other coordinates + if isinstance(coords, dict): + coord_not_dim = { + k: v + for k, v in coords.items() + if len(set(v.dims).intersection(list(coords))) != 1 + } + coords = deepcopy(coords) + coords = { + k: v + for k, v in coords.items() + if k in set(coords).difference(coord_not_dim) + } - out = ( - ds.drop_vars(dims) - .assign_coords({dim: pd.MultiIndex.from_arrays(crds, names=dims)}) - .unstack(dim) + dims, crds = zip( + *[ + (name, crd.load().values) + for name, crd in ds.coords.items() + if (crd.dims == (dim,) and name in set(coords)) + ] ) - if not isinstance(coords, (list, tuple)) and coords is not None: - out = out.reindex(**coords.coords) + # Reconstruct the dimensions + mindex_obj = pd.MultiIndex.from_arrays(crds, names=dims) + mindex_coords = xr.Coordinates.from_pandas_multiindex(mindex_obj, dim) + + out = ds.drop_vars(dims).assign_coords(mindex_coords).unstack(dim) + + if isinstance(coords, dict): + # Reindex with the coords that were dimensions + out = out.reindex(**coords) + # Add back the coordinates that aren't dimensions + for c in coord_not_dim: + out[c] = coord_not_dim[c] + + # Reorder the dimensions to match the CF conventions + order = [out.cf.axes.get(d, [""])[0] for d in ["T", "Z", "Y", "X"]] + order = [d for d in order if d] + [d for d in out.dims if d not in order] + out = out.transpose(*order) for dim in dims: out[dim].attrs.update(ds[dim].attrs) @@ -485,8 +588,7 @@ def unstack_fill_nan( def natural_sort(_list: list[str]): - """ - For strings of numbers. alternative to sorted() that detects a more natural order. + """For strings of numbers. alternative to sorted() that detects a more natural order. e.g. [r3i1p1, r1i1p1, r10i1p1] is sorted as [r1i1p1, r3i1p1, r10i1p1] instead of [r10i1p1, r1i1p1, r3i1p1] """ @@ -498,26 +600,25 @@ def natural_sort(_list: list[str]): def get_cat_attrs( - ds: Union[xr.Dataset, xr.DataArray, dict], prefix: str = "cat:", var_as_str=False + ds: xr.Dataset | xr.DataArray | dict, prefix: str = "cat:", var_as_str=False ) -> dict: """Return the catalog-specific attributes from a dataset or dictionary. Parameters ---------- - ds: xr.Dataset, dict + ds : xr.Dataset, dict Dataset to be parsed. If a dictionary, it is assumed to be the attributes of the dataset (ds.attrs). - prefix: str + prefix : str Prefix automatically generated by intake-esm. With xscen, this should be 'cat:' - var_as_str: bool + var_as_str : bool If True, 'variable' will be returned as a string if there is only one. Returns ------- dict Compilation of all attributes in a dictionary. - """ - if isinstance(ds, (xr.Dataset, xr.DataArray)): + if isinstance(ds, xr.Dataset | xr.DataArray): attrs = ds.attrs else: attrs = ds @@ -539,8 +640,9 @@ def get_cat_attrs( @parse_config def maybe_unstack( ds: xr.Dataset, - coords: Optional[str] = None, - rechunk: Optional[dict] = None, + dim: str | None = None, + coords: str | None = None, + rechunk: dict | None = None, stack_drop_nans: bool = False, ) -> xr.Dataset: """If stack_drop_nans is True, unstack and rechunk. @@ -549,6 +651,8 @@ def maybe_unstack( ---------- ds : xr.Dataset Dataset to unstack. + dim : str, optional + Dimension to unstack. coords : str, optional Path to a dataset containing the coords to unstack (and only those). rechunk : dict, optional @@ -563,7 +667,7 @@ def maybe_unstack( Unstacked dataset. """ if stack_drop_nans: - ds = unstack_fill_nan(ds, coords=coords) + ds = unstack_fill_nan(ds, dim=dim, coords=coords) if rechunk is not None: ds = ds.chunk(rechunk) return ds @@ -589,31 +693,31 @@ def maybe_unstack( xs.utils.CV.frequency_to_timedelta.dict - .. literalinclude:: ../xscen/CVs/frequency_to_timedelta.json + .. literalinclude:: ../src/xscen/CVs/frequency_to_timedelta.json :language: json :caption: frequency_to_timedelta - .. literalinclude:: ../xscen/CVs/frequency_to_xrfreq.json + .. literalinclude:: ../src/xscen/CVs/frequency_to_xrfreq.json :language: json :caption: frequency_to_xrfreq - .. literalinclude:: ../xscen/CVs/infer_resolution.json + .. literalinclude:: ../src/xscen/CVs/infer_resolution.json :language: json :caption: infer_resolution - .. literalinclude:: ../xscen/CVs/resampling_methods.json + .. literalinclude:: ../src/xscen/CVs/resampling_methods.json :language: json :caption: resampling_methods - .. literalinclude:: ../xscen/CVs/variable_names.json + .. literalinclude:: ../src/xscen/CVs/variable_names.json :language: json :caption: variable_names - .. literalinclude:: ../xscen/CVs/xrfreq_to_frequency.json + .. literalinclude:: ../src/xscen/CVs/xrfreq_to_frequency.json :language: json :caption: xrfreq_to_frequency - .. literalinclude:: ../xscen/CVs/xrfreq_to_timedelta.json + .. literalinclude:: ../src/xscen/CVs/xrfreq_to_timedelta.json :language: json :caption: xrfreq_to_timedelta @@ -623,7 +727,7 @@ def maybe_unstack( ) -def __read_CVs(cvfile): +def __read_CVs(cvfile): # noqa: N802 with cvfile.open("r") as f: cv = json.load(f) is_regex = cv.pop("is_regex", False) @@ -633,14 +737,14 @@ def __read_CVs(cvfile): Parameters ---------- - key: str - The value to translate.{regex} + key : str + The value to translate.{regex} default : 'pass', 'error' or Any - If the key is not found in the mapping, default controls the behaviour. + If the key is not found in the mapping, default controls the behaviour. - - "error", a KeyError is raised (default). - - "pass", the key is returned. - - another value, that value is returned. + - "error", a KeyError is raised (default). + - "pass", the key is returned. + - another value, that value is returned. """ def cvfunc(key, default="error"): @@ -671,10 +775,11 @@ def cvfunc(key, default="error"): return cvfunc -for cvfile in (Path(__file__).parent / "CVs").glob("*.json"): +for cvfile in Path(__file__).parent.joinpath("CVs").glob("*.json"): try: CV.__dict__[cvfile.stem] = __read_CVs(cvfile) - except Exception as err: + # FIXME: This is a catch-all, but we should be more specific + except Exception as err: # noqa: BLE001 raise ValueError(f"While reading {cvfile} got {err}") @@ -715,7 +820,7 @@ def change_units(ds: xr.Dataset, variables_and_units: dict) -> xr.Dataset: # ds is a rate ds[v] = units.rate2amount(ds[v], out_units=variables_and_units[v]) else: - raise NotImplementedError( + raise ValueError( f"No known transformation between {ds[v].units} and {variables_and_units[v]} (temporal dimensionality mismatch)." ) elif (v in ds) and (ds[v].units != variables_and_units[v]): @@ -728,27 +833,26 @@ def change_units(ds: xr.Dataset, variables_and_units: dict) -> xr.Dataset: def clean_up( # noqa: C901 ds: xr.Dataset, *, - variables_and_units: Optional[dict] = None, - convert_calendar_kwargs: Optional[dict] = None, - missing_by_var: Optional[dict] = None, - maybe_unstack_dict: Optional[dict] = None, - round_var: Optional[dict] = None, - common_attrs_only: Optional[ - Union[dict, list[Union[xr.Dataset, str, os.PathLike]]] - ] = None, - common_attrs_open_kwargs: Optional[dict] = None, - attrs_to_remove: Optional[dict] = None, - remove_all_attrs_except: Optional[dict] = None, - add_attrs: Optional[dict] = None, - change_attr_prefix: Optional[str] = None, - to_level: Optional[str] = None, + variables_and_units: dict | None = None, + convert_calendar_kwargs: dict | None = None, + missing_by_var: dict | None = None, + maybe_unstack_dict: dict | None = None, + round_var: dict | None = None, + common_attrs_only: None | (dict | list[xr.Dataset | str | os.PathLike]) = None, + common_attrs_open_kwargs: dict | None = None, + attrs_to_remove: dict | None = None, + remove_all_attrs_except: dict | None = None, + add_attrs: dict | None = None, + change_attr_prefix: str | dict | None = None, + to_level: str | None = None, ) -> xr.Dataset: """Clean up of the dataset. It can: - - convert to the right units using xscen.finalize.change_units + - convert to the right units using xscen.utils.change_units - convert the calendar and interpolate over missing dates - - call the xscen.common.maybe_unstack function + - call the xscen.utils.maybe_unstack function + - round variables - remove a list of attributes - remove everything but a list of attributes - add attributes @@ -761,48 +865,44 @@ def clean_up( # noqa: C901 ds : xr.Dataset Input dataset to clean up variables_and_units : dict, optional - Dictionary of variable to convert. eg. {'tasmax': 'degC', 'pr': 'mm d-1'} + Dictionary of variable to convert. e.g. {'tasmax': 'degC', 'pr': 'mm d-1'} convert_calendar_kwargs : dict, optional - Dictionary of arguments to feed to xclim.core.calendar.convert_calendar. This will be the same for all variables. + Dictionary of arguments to feed to xarray.Dataset.convert_calendar. This will be the same for all variables. If missing_by_vars is given, it will override the 'missing' argument given here. - Eg. {target': default, 'align_on': 'random'} + Eg. {'calendar': 'standard', 'align_on': 'random'} missing_by_var : dict, optional Dictionary where the keys are the variables and the values are the argument to feed the `missing` - parameters of the xclim.core.calendar.convert_calendar for the given variable with the `convert_calendar_kwargs`. - When the value of an entry is 'interpolate', the missing values will be filled with NaNs, then linearly interpolated over time. + parameters of xarray.Dataset.convert_calendar for the given variable with the + `convert_calendar_kwargs`. When the value of an entry is 'interpolate', the missing values will be filled + with NaNs, then linearly interpolated over time. maybe_unstack_dict : dict, optional Dictionary to pass to xscen.common.maybe_unstack function. The format should be: {'coords': path_to_coord_file, 'rechunk': {'time': -1 }, 'stack_drop_nans': True}. round_var : dict, optional - Dictionary where the keys are the variables of the dataset and the values are the number of decimal places to round to + Dictionary where the keys are the variables of the dataset and the values are the number of + decimal places to round to. common_attrs_only : dict, list of datasets, or list of paths, optional - Dictionnary of datasets or list of datasets, or path to NetCDF or Zarr files. + Dictionary of datasets or list of datasets, or path to NetCDF or Zarr files. Keeps only the global attributes that are the same for all datasets and generates a new id. common_attrs_open_kwargs : dict, optional Dictionary of arguments for xarray.open_dataset(). Used with common_attrs_only if given paths. attrs_to_remove : dict, optional Dictionary where the keys are the variables and the values are a list of the attrs that should be removed. + The match is done using re.fullmatch, so the strings can be regex patterns but don't need to contain '^' or '$'. For global attrs, use the key 'global'. - The element of the list can be exact matches for the attributes name - or use the same substring matching rules as intake_esm: - - ending with a '*' means checks if the substring is contained in the string - - starting with a '^' means check if the string starts with the substring. - eg. {'global': ['unnecessary note', 'cell*'], 'tasmax': 'old_name'} + e.g. {'global': ['unnecessary note', 'cell.*'], 'tasmax': 'old_name'} remove_all_attrs_except : dict, optional - Dictionary where the keys are the variables and the values are a list of the attrs that should NOT be removed, - all other attributes will be deleted. If None (default), nothing will be deleted. - For global attrs, use the key 'global'. - The element of the list can be exact matches for the attributes name - or use the same substring matching rules as intake_esm: - - ending with a '*' means checks if the substring is contained in the string - - starting with a '^' means check if the string starts with the substring. - eg. {'global': ['necessary note', '^cat:'], 'tasmax': 'new_name'} + Dictionary where the keys are the variables and the values are a list of the attrs that should NOT be removed. + The match is done using re.fullmatch, so the strings can be regex patterns but don't need to contain '^' or '$'. + All other attributes will be deleted. For global attrs, use the key 'global'. + e.g. {'global': ['necessary note', '^cat:'], 'tasmax': 'new_name'} add_attrs : dict, optional Dictionary where the keys are the variables and the values are a another dictionary of attributes. For global attrs, use the key 'global'. - eg. {'global': {'title': 'amazing new dataset'}, 'tasmax': {'note': 'important info about tasmax'}} - change_attr_prefix : str, optional - Replace "cat:" in the catalog global attrs by this new string + e.g. {'global': {'title': 'amazing new dataset'}, 'tasmax': {'note': 'important info about tasmax'}} + change_attr_prefix : str or dict, optional + If a string, replace "cat:" in the catalog global attributes by this new string. + If a dictionary, the key is the old prefix and the value is the new prefix. to_level : str, optional The processing level to assign to the output. @@ -815,15 +915,17 @@ def clean_up( # noqa: C901 -------- xclim.core.calendar.convert_calendar """ + ds = ds.copy() + if variables_and_units: - logger.info(f"Converting units: {variables_and_units}") + msg = f"Converting units: {variables_and_units}" + logger.info(msg) ds = change_units(ds=ds, variables_and_units=variables_and_units) # convert calendar if convert_calendar_kwargs: - ds_copy = ds.copy() # create mask of grid point that should always be nan - ocean = ds_copy.isnull().all("time") + ocean = ds.isnull().all("time") # if missing_by_var exist make sure missing data are added to time axis if missing_by_var: @@ -834,21 +936,31 @@ def clean_up( # noqa: C901 convert_calendar_kwargs["missing"] = -9999 # make default `align_on`='`random` when the initial calendar is 360day - if get_calendar(ds) == "360_day" and "align_on" not in convert_calendar_kwargs: + if ( + any( + cal == "360_day" + for cal in [ds.time.dt.calendar, convert_calendar_kwargs["calendar"]] + ) + and "align_on" not in convert_calendar_kwargs + ): convert_calendar_kwargs["align_on"] = "random" - logger.info(f"Converting calendar with {convert_calendar_kwargs} ") - ds = convert_calendar(ds, **convert_calendar_kwargs).where(~ocean) + msg = f"Converting calendar with {convert_calendar_kwargs}." + logger.info(msg) + ds = ds.convert_calendar(**convert_calendar_kwargs).where(~ocean) # convert each variable individually if missing_by_var: - # remove 'missing' argument to be replace by `missing_by_var` + # remove 'missing' argument to be replaced by `missing_by_var` del convert_calendar_kwargs["missing"] for var, missing in missing_by_var.items(): - logging.info(f"Filling missing {var} with {missing}") + msg = f"Filling missing {var} with {missing}" + logging.info(msg) if missing == "interpolate": ds_with_nan = ds[var].where(ds[var] != -9999) - converted_var = ds_with_nan.interpolate_na("time", method="linear") + converted_var = ds_with_nan.chunk({"time": -1}).interpolate_na( + "time", method="linear" + ) else: var_attrs = ds[var].attrs converted_var = xr.where(ds[var] == -9999, missing, ds[var]) @@ -862,14 +974,16 @@ def clean_up( # noqa: C901 if round_var: for var, n in round_var.items(): ds[var] = ds[var].round(n) - - def _search(a, b): - if a[-1] == "*": # check if a is contained in b - return a[:-1] in b - elif a[0] == "^": - return b.startswith(a[1:]) - else: - return a == b + new_history = ( + f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] " + f"Rounded '{var}' to {n} decimals." + ) + history = ( + f"{new_history}\n{ds[var].attrs['history']}" + if "history" in ds[var].attrs + else new_history + ) + ds[var].attrs["history"] = history if common_attrs_only: from .catalog import generate_id @@ -879,7 +993,7 @@ def _search(a, b): common_attrs_only = list(common_attrs_only.values()) for i in range(len(common_attrs_only)): - if isinstance(common_attrs_only[i], (str, os.PathLike)): + if isinstance(common_attrs_only[i], str | os.PathLike): dataset = xr.open_dataset( common_attrs_only[i], **common_attrs_open_kwargs ) @@ -898,9 +1012,8 @@ def _search(a, b): try: ds.attrs["cat:id"] = generate_id(ds).iloc[0] except IndexError as err: - logger.warning(f"Unable to generate a new id for the dataset. Got {err}.") - if "cat:id" in ds.attrs: - del ds.attrs["cat:id"] + msg = f"Unable to generate a new id for the dataset. Got {err}." + logger.warning(msg) if to_level: ds.attrs["cat:processing_level"] = to_level @@ -909,24 +1022,32 @@ def _search(a, b): if attrs_to_remove: for var, list_of_attrs in attrs_to_remove.items(): obj = ds if var == "global" else ds[var] - for ds_attr in list(obj.attrs.keys()): # iter over attrs in ds - for list_attr in list_of_attrs: # check if we want to remove attrs - if _search(list_attr, ds_attr): - del obj.attrs[ds_attr] + to_remove = list( + chain.from_iterable( + [ + list(filter(re.compile(attr).fullmatch, list(obj.attrs.keys()))) + for attr in list_of_attrs + ] + ) + ) + for attr in to_remove: + del obj.attrs[attr] # delete all attrs, but the ones in the list if remove_all_attrs_except: for var, list_of_attrs in remove_all_attrs_except.items(): obj = ds if var == "global" else ds[var] - for ds_attr in list(obj.attrs.keys()): # iter over attrs in ds - delete = True # assume we should delete it - for list_attr in list_of_attrs: - if _search(list_attr, ds_attr): - delete = ( - False # if attr is on the list to not delete, don't delete - ) - if delete: - del obj.attrs[ds_attr] + to_keep = list( + chain.from_iterable( + [ + list(filter(re.compile(attr).fullmatch, list(obj.attrs.keys()))) + for attr in list_of_attrs + ] + ) + ) + to_remove = list(set(obj.attrs.keys()).difference(to_keep)) + for attr in to_remove: + del obj.attrs[attr] if add_attrs: for var, attrs in add_attrs.items(): @@ -935,19 +1056,38 @@ def _search(a, b): obj.attrs[attrname] = attrtmpl if change_attr_prefix: + if isinstance(change_attr_prefix, str): + change_attr_prefix = {"cat:": change_attr_prefix} + # Make sure that the prefixes are in the right format + chg_attr_prefix = {} + for old_prefix, new_prefix in change_attr_prefix.items(): + if not old_prefix.endswith(":"): + old_prefix += ":" + if not new_prefix.endswith(":"): + new_prefix += ":" + chg_attr_prefix[old_prefix] = new_prefix + + # Change the prefixes, but keep the order of the keys + attrs = {} for ds_attr in list(ds.attrs.keys()): - new_name = ds_attr.replace("cat:", change_attr_prefix) - if new_name: - ds.attrs[new_name] = ds.attrs.pop(ds_attr) + changed = False + for old_prefix, new_prefix in chg_attr_prefix.items(): + if ds_attr.startswith(old_prefix): + new_name = ds_attr.replace(old_prefix, new_prefix) + attrs[new_name] = ds.attrs[ds_attr] + changed = True + if not changed: + attrs[ds_attr] = ds.attrs[ds_attr] + ds.attrs = attrs return ds def publish_release_notes( style: str = "md", - file: Optional[Union[os.PathLike, StringIO, TextIO]] = None, - changes: Union[str, os.PathLike] = None, -) -> Optional[str]: + file: os.PathLike | StringIO | TextIO | None = None, + changes: str | os.PathLike | None = None, +) -> str | None: """Format release history in Markdown or ReStructuredText. Parameters @@ -966,18 +1106,17 @@ def publish_release_notes( Notes ----- - This function exists solely for development purposes. - Adapted from xclim.testing.utils.publish_release_notes. + This function exists solely for development purposes. Adapted from xclim.testing.utils.publish_release_notes. """ - if isinstance(changes, (str, Path)): + if isinstance(changes, str | Path): changes_file = Path(changes).absolute() else: - changes_file = Path(__file__).absolute().parents[2].joinpath("CHANGES.rst") + changes_file = Path(__file__).absolute().parents[2].joinpath("CHANGELOG.rst") if not changes_file.exists(): raise FileNotFoundError("Changes file not found in xscen file tree.") - with open(changes_file) as f: + with Path(changes_file).open(encoding="utf-8") as f: changes = f.read() if style == "rst": @@ -1009,7 +1148,7 @@ def publish_release_notes( str(grouping[0]).replace("(", r"\(").replace(")", r"\)") ) search = rf"({fixed_grouping})\n([\{level}]{'{' + str(len(grouping[1])) + '}'})" - replacement = f"{'##' if level=='-' else '###'} {grouping[0]}" + replacement = f"{'##' if level == '-' else '###'} {grouping[0]}" changes = re.sub(search, replacement, changes) link_expressions = r"[\`]{1}([\w\s]+)\s<(.+)>`\_" @@ -1021,32 +1160,34 @@ def publish_release_notes( if not file: return changes - if isinstance(file, (Path, os.PathLike)): + if isinstance(file, Path | os.PathLike): file = Path(file).open("w") print(changes, file=file) -def unstack_dates( +def unstack_dates( # noqa: C901 ds: xr.Dataset, - seasons: Optional[dict[int, str]] = None, - new_dim: str = "season", + seasons: dict[int, str] | None = None, + new_dim: str | None = None, winter_starts_year: bool = False, ): """Unstack a multi-season timeseries into a yearly axis and a season one. Parameters ---------- - ds: xr.Dataset or DataArray + ds : xr.Dataset or DataArray The xarray object with a "time" coordinate. Only supports monthly or coarser frequencies. The time axis must be complete and regular (`xr.infer_freq(ds.time)` doesn't fail). - seasons: dict, optional + seasons : dict, optional A dictionary from month number (as int) to a season name. - If not given, it is guessed from the time coord's frequency. + If not given, it is guessed from the time coordinate frequency. See notes. - new_dim: str + new_dim : str, optional The name of the new dimension. - winter_starts_year: bool + If None, the name is inferred from the frequency of the time axis. + See notes. + winter_starts_year : bool If True, the year of winter (DJF) is built from the year of January, not December. i.e. DJF made from [Dec 1980, Jan 1981, and Feb 1981] will be associated with the year 1981, not 1980. @@ -1057,14 +1198,16 @@ def unstack_dates( Notes ----- - When `season` is None, the inferred frequency determines the new coordinate: - + When `seasons` is None, the inferred frequency determines the new coordinate: - For MS, the coordinates are the month abbreviations in english (JAN, FEB, etc.) - - For ?QS-? and other ?MS frequencies, the coordinates are the initials of the months in each season. - Ex: QS-DEC (with winter_starts_year=True) : DJF, MAM, JJA, SON. + - For ?QS-? and other ?MS frequencies, the coordinates are the initials of the months in each season. Ex: QS -> DJF, MAM, JJA, SON. - For YS or YS-JAN, the new coordinate has a single value of "annual". - - For ?YS-? frequencies, the new coordinate has a single value of "annual-{anchor}", were "anchor" - is the abbreviation of the first month of the year. Ex: YS-JUL -> "annual-JUL". + - For ?YS-? frequencies, the new coordinate has a single value of "annual-{anchor}". Ex: YS-JUL -> "annual-JUL". + + When `new_dim` is None, the new dimension name is inferred from the frequency: + - For ?YS, ?QS frequencies or ?MS with mult > 1, the new dimension is "season". + - For MS, the new dimension is "month". + """ # Get some info about the time axis freq = xr.infer_freq(ds.time) @@ -1084,23 +1227,35 @@ def unstack_dates( f"Only monthly frequencies or coarser are supported. Got: {freq}." ) - # Fast track for annual + if new_dim is None: + if base == "M" and mult == 1: + new_dim = "month" + else: + new_dim = "season" + if base in "YA": if seasons: - seaname = seasons[first.month] + seaname = f"{seasons[first.month]}" elif anchor == "JAN": seaname = "annual" else: seaname = f"annual-{anchor}" - dso = ds.expand_dims({new_dim: [seaname]}) - dso["time"] = xr.date_range( - f"{first.year}-01-01", - f"{last.year}-01-01", - freq="YS", - calendar=calendar, - use_cftime=use_cftime, - ) - return dso + if mult > 1: + seaname = f"{mult}{seaname}" + # Fast track for annual, if nothing more needs to be done. + if winter_starts_year is False: + dso = ds.expand_dims({new_dim: [seaname]}) + dso["time"] = xr.date_range( + f"{first.year}-01-01", + f"{last.year}-01-01", + freq=f"{mult}YS", + calendar=calendar, + use_cftime=use_cftime, + ) + return dso + else: + seasons = seasons or {} + seasons.update({first.month: seaname}) if base == "M" and 12 % mult != 0: raise ValueError( @@ -1119,6 +1274,9 @@ def unstack_dates( } else: # M or MS seasons = xr.coding.cftime_offsets._MONTH_ABBREVIATIONS + else: + # Only keep the entries for the months in the data + seasons = {m: seasons[m] for m in np.unique(ds.time.dt.month)} # The ordered season names seas_list = [seasons[month] for month in sorted(seasons.keys())] @@ -1152,8 +1310,6 @@ def unstack_dates( ) def reshape_da(da): - if "time" not in da.dims: - return da # Replace (A,'time',B) by (A,'time', 'season',B) in both the new shape and the new dims new_dims = list( chain.from_iterable( @@ -1172,9 +1328,10 @@ def reshape_da(da): new_coords = dict(ds.coords) new_coords.update({"time": new_time, new_dim: seas_list}) - # put horizon in the right time dimension - if "horizon" in new_coords: - new_coords["horizon"] = reshape_da(new_coords["horizon"]) + # put other coordinates that depend on time in the new shape + for coord in new_coords: + if (coord not in ["time", new_dim]) and ("time" in ds[coord].dims): + new_coords[coord] = reshape_da(dsp[coord]) if isinstance(ds, xr.Dataset): dso = dsp.map(reshape_da, keep_attrs=True) @@ -1184,9 +1341,9 @@ def reshape_da(da): def show_versions( - file: Optional[Union[os.PathLike, StringIO, TextIO]] = None, - deps: Optional[list] = None, -) -> Optional[str]: + file: os.PathLike | StringIO | TextIO | None = None, + deps: list | None = None, +) -> str | None: """Print the versions of xscen and its dependencies. Parameters @@ -1217,11 +1374,13 @@ def show_versions( "intake_esm", "matplotlib", "netCDF4", + "numcodecs", "numpy", "pandas", "parse", "pyyaml", "rechunker", + "scipy", "shapely", "sparse", "toolz", @@ -1229,26 +1388,53 @@ def show_versions( "xclim", "xesmf", "zarr", - # For translations - "babel", # Opt "nc-time-axis", "pyarrow", - # Extras specific to this function - "fastprogress", - "intake", - "pydantic", - "requests", - "xcollection", - "yaml", + # Dev + "babel", + "black", + "blackdoc", + "bump-my-version", + "coverage", + "coveralls", + "flake8", + "flake8-rst-docstrings", + "ipykernel", + "ipython", + "isort", + "jupyter_client", + "nbsphinx", + "nbval", + "pandoc", + "pooch", + "pre-commit", + "pytest", + "pytest-cov", + "ruff", + "setuptools", + "setuptools-scm", + "sphinx", + "sphinx-autoapi", + "sphinx-rtd-theme", + "sphinxcontrib-napoleon", + "sphinx-codeautolink", + "sphinx-copybutton", + "sphinx-mdinclude", + "watchdog", + "xdoctest", + "tox", + "build", + "wheel", + "pip", + "flake8-alphabetize", ] return _show_versions(file=file, deps=deps) def ensure_correct_time(ds: xr.Dataset, xrfreq: str) -> xr.Dataset: - """ - Ensure a dataset has the correct time coordinate, as expected for the given frequency. + """Ensure a dataset has the correct time coordinate, as expected for the given frequency. Daily or finer datasets are "floored" even if `xr.infer_freq` succeeds. Errors are raised if the number of data points per period is not 1. @@ -1268,17 +1454,17 @@ def ensure_correct_time(ds: xr.Dataset, xrfreq: str) -> xr.Dataset: "Dataset is labelled as having a sampling frequency of " f"{xrfreq}, but some periods have more than one data point." ) - if (counts.isnull()).any().item(): + if (counts.isnull() | (counts == 0)).any().item(): raise ValueError( - "The resampling count contains nans. There might be some missing data." + "The resampling count contains NaNs or 0s. There might be some missing data." ) ds["time"] = counts.time return ds def standardize_periods( - periods: Optional[Union[list[str], list[list[str]]]], multiple: bool = True -) -> Optional[Union[list[str], list[list[str]]]]: + periods: list[str] | list[list[str]] | None, multiple: bool = True +) -> list[str] | list[list[str]] | None: """Reformats the input to a list of strings, ['start', 'end'], or a list of such lists. Parameters @@ -1315,8 +1501,8 @@ def standardize_periods( return periods[0] -def season_sort_key(idx: pd.Index, name: Optional[str] = None): - """Get a proper sort key for a "season" or "month" index to avoid alphabetical sorting. +def season_sort_key(idx: pd.Index, name: str | None = None): + """Get a proper sort key for a "season" or "month" index to avoid alphabetical sorting. If any of the values in the index is not recognized as a 3-letter season code or a 3-letter month abbreviation, the operation is @@ -1344,10 +1530,10 @@ def season_sort_key(idx: pd.Index, name: Optional[str] = None): if (name or getattr(idx, "name", None)) == "month": m = list(xr.coding.cftime_offsets._MONTH_ABBREVIATIONS.values()) return idx.map(m.index) - except (TypeError, ValueError): + except (TypeError, ValueError) as err: # ValueError if string not in seasons, or value not in months # TypeError if season element was not a string. - pass + logging.error(err) return idx @@ -1416,3 +1602,11 @@ def _xarray_defaults(**kwargs): "data_vars", "minimal" ) return kwargs + + +def rechunk_for_resample(obj: xr.DataArray | xr.Dataset, **resample_kwargs): + if not uses_dask(obj): + return obj + + res = obj.resample(**resample_kwargs) + return flox.xarray.rechunk_for_blockwise(obj, res._dim, res._codes) diff --git a/xscen/xclim_modules/__init__.py b/src/xscen/xclim_modules/__init__.py similarity index 100% rename from xscen/xclim_modules/__init__.py rename to src/xscen/xclim_modules/__init__.py diff --git a/xscen/xclim_modules/conversions.py b/src/xscen/xclim_modules/conversions.py similarity index 100% rename from xscen/xclim_modules/conversions.py rename to src/xscen/xclim_modules/conversions.py diff --git a/xscen/xclim_modules/conversions.yml b/src/xscen/xclim_modules/conversions.yml similarity index 100% rename from xscen/xclim_modules/conversions.yml rename to src/xscen/xclim_modules/conversions.yml diff --git a/templates/1-basic_workflow_with_config/config1.yml b/templates/1-basic_workflow_with_config/config1.yml index 9d4f7f1d..e66ab33a 100644 --- a/templates/1-basic_workflow_with_config/config1.yml +++ b/templates/1-basic_workflow_with_config/config1.yml @@ -138,6 +138,7 @@ extract: - '1950' - '2100' other_search_criteria: # put the simulations you want here + processing_level: raw mip_era: CMIP6 experiment: - ssp245 @@ -326,7 +327,7 @@ cleanup: tasmin: degC pr: mm d-1 convert_calendar_kwargs: - target: standard + calendar: standard align_on: random missing_by_var: tasmax: interpolate @@ -477,7 +478,7 @@ aggregate: op: mean window: 30 stride: 10 - periods: [['1951', '2100']] + periods: [ [ '1951', '2100' ] ] to_level: climatology #periods_as_dim: True #min_periods: @@ -540,7 +541,7 @@ logging: # general logging args class : logging.StreamHandler formatter: default level : INFO -# file: +# file: #Uncomment if you want a log file, don't forget to also uncomment the filename in paths1.yml. # class: logging.FileHandler # formatter: default # level : DEBUG @@ -548,11 +549,11 @@ logging: # general logging args xscen: propagate: False level: INFO - handlers: [console] # [file, console] could also be used to write the log to a file + handlers: [ console ] # [file, console] could also be used to write the log to a file xclim: # Options for xclim - metadata_locales: # Enable french translation for xclim indicators, but also some xscen methods. + metadata_locales: # Enable French translation for xclim indicators, but also some xscen methods. - fr diff --git a/templates/1-basic_workflow_with_config/paths1_example.yml b/templates/1-basic_workflow_with_config/paths1_example.yml index 217ef9a4..4521a8ee 100644 --- a/templates/1-basic_workflow_with_config/paths1_example.yml +++ b/templates/1-basic_workflow_with_config/paths1_example.yml @@ -13,11 +13,11 @@ extract: reconstruction: search_data_catalogs: data_catalogs: - - PATH_TO_OFFICIAL_CATALOGUES/reconstruction.json + - PATH_TO_OFFICIAL_CATALOGUES/reconstruction.json simulation: search_data_catalogs: data_catalogs: - - PATH_TO_OFFICIAL_CATALOGUES/simulation.json + - PATH_TO_OFFICIAL_CATALOGUES/simulation.json regrid: regrid_dataset: @@ -54,19 +54,19 @@ dask: local_directory: DASK_PATH dashboard_address: YOUR_RANDOM_NUMBER -logging: - handlers: - file: - filename: PATH/logger.log +#logging: #Uncomment if you want a log file +# handlers: +# file: +# filename: PATH/logger.log utils: - stack_drop_nans: - to_file: &coords - PATH/stack_coords/coords_{domain}_{shape}.nc - unstack_fill_nan: - coords: *coords - maybe_unstack: - coords: *coords + stack_drop_nans: + to_file: &coords + PATH/stack_coords/coords_{domain}_{shape}.nc + unstack_fill_nan: + coords: *coords + maybe_unstack: + coords: *coords scripting: send_mail: diff --git a/templates/1-basic_workflow_with_config/workflow1.py b/templates/1-basic_workflow_with_config/workflow1.py index ac8f70a4..70824b6b 100644 --- a/templates/1-basic_workflow_with_config/workflow1.py +++ b/templates/1-basic_workflow_with_config/workflow1.py @@ -2,6 +2,7 @@ import atexit import logging +from pathlib import Path import xarray as xr from dask import config as dskconf @@ -37,8 +38,10 @@ # Copy config to the top of the log file if "logging" in CONFIG and "file" in CONFIG["logging"]["handlers"]: - f1 = open(CONFIG["logging"]["handlers"]["file"]["filename"], "a+") - f2 = open("config1.yml") + f1 = Path(CONFIG["logging"]["handlers"]["file"]["filename"], "a+").open( + encoding="utf-8" + ) + f2 = Path("config1.yml").open(encoding="utf-8") f1.write(f2.read()) f1.close() f2.close() @@ -64,7 +67,7 @@ if "extract" in CONFIG["tasks"]: # Iterate on types of data to extract (reconstruction, simulation) # and get the respective dictionary from the config - for source_type, type_dict in CONFIG["extract"].items(): + for type_dict in CONFIG["extract"].values(): # Filter the catalog to get only the datasets that match the arguments in the config. # Arguments are not passed automatically, because the config is different for each type of data. # Therefore, we must manually send the 'type_dict' entry to the search_data_catalogs function. @@ -105,7 +108,8 @@ ds, ds[list(ds.data_vars)[0]] .isel(time=0, drop=True) - .notnull(), + .notnull() + .compute(), ) # Prepare the filename for the zarr file, using the format specified in paths1.yml path = CONFIG["paths"]["task"].format(**cur) @@ -169,7 +173,7 @@ for var, ba_dict in CONFIG["biasadjust"].items(): # Search the ProjectCatalog for the results of the previous step, then iterate over each dataset. dict_sim = pcat.search(**ba_dict["sim_inputs"]).to_dataset_dict(**tdd) - for id_sim, ds_sim in dict_sim.items(): + for ds_sim in dict_sim.values(): cur = { "id": ds_sim.attrs["cat:id"], "xrfreq": ds_sim.attrs["cat:xrfreq"], @@ -285,7 +289,7 @@ if "diagnostics" in CONFIG["tasks"]: # The properties and measures that we want to compute are different for each type of data (ref, sim, scen), # so we need to iterate over them. - for kind, kind_dict in CONFIG["diagnostics"]["kind"].items(): + for kind_dict in CONFIG["diagnostics"]["kind"].values(): # Search for the right datasets and iterate over them dict_input = pcat.search(**kind_dict["inputs"]).to_dataset_dict(**tdd) for key_input, ds_input in dict_input.items(): @@ -335,7 +339,7 @@ meas_dict = pcat.search(processing_level="diag-measures-sim").to_dataset_dict( **tdd ) - for id_meas, ds_meas_sim in meas_dict.items(): + for ds_meas_sim in meas_dict.values(): cur = { "id": ds_meas_sim.attrs["cat:id"], "processing_level": "diag-improved", diff --git a/templates/2-indicators_only/config2.yml b/templates/2-indicators_only/config2.yml index 03f4427b..b65fb341 100644 --- a/templates/2-indicators_only/config2.yml +++ b/templates/2-indicators_only/config2.yml @@ -2,36 +2,36 @@ ## Comments starting with a single # are example/suggested entries ## Descriptive comments start with ##. dask: - client: - n_workers: 3 - threads_per_worker: 4 - memory_limit: 10GB - # dashboard_address: 11111 - # silence_logs: 50 # To supress warnings about garbage collection and other inevitable stuff + client: + n_workers: 3 + threads_per_worker: 4 + memory_limit: 10GB + # dashboard_address: 11111 + # silence_logs: 50 # To supress warnings about garbage collection and other inevitable stuff array.slicing.split_large_chunks: False logging: - formatters: - default: - format: '%(asctime)s %(levelname)-8s %(name)-15s %(message)s' - datefmt: '%Y-%m-%d %H:%M:%S' - handlers: - console: - class : logging.StreamHandler - formatter: default - level : DEBUG - loggers: - workflow: - level: INFO - propagate: False - handlers: [console] - xscen: - level: INFO - propagate: False - handlers: [console] - root: - level: INFO - handlers: [console] + formatters: + default: + format: '%(asctime)s %(levelname)-8s %(name)-15s %(message)s' + datefmt: '%Y-%m-%d %H:%M:%S' + handlers: + console: + class : logging.StreamHandler + formatter: default + level : DEBUG + loggers: + workflow: + level: INFO + propagate: False + handlers: [ console ] + xscen: + level: INFO + propagate: False + handlers: [ console ] + root: + level: INFO + handlers: [ console ] xclim: ## Enable french translation for xclim indicators, but also some xscen methods. @@ -41,54 +41,54 @@ xclim: cf_compliance: log main: - ## Path to a project catalog, the workflow creates it if needed - catalog: - ## The template of the file name, including the parent path, valid fields are the catalog's column - ## One zarr dataset per xrfreq is produced - # outfilename: /DATA/{source}_indicators_{xrfreq}_{date_start:%Y}-{date_end:%Y}.zarr - outfilename: + ## Path to a project catalog, the workflow creates it if needed + catalog: + ## The template of the file name, including the parent path, valid fields are the catalog's column + ## One zarr dataset per xrfreq is produced + # outfilename: /DATA/{source}_indicators_{xrfreq}_{date_start:%Y}-{date_end:%Y}.zarr + outfilename: indicators: - ## Path (careful : this is relative to where the script is called) - module: indicators2.yml + ## Path (careful : this is relative to where the script is called) + module: indicators2.yml extract: - ## Arguments to select the dataset and extract it. - ## Example args as comments, good defaults uncommented - search_data_catalogs: - data_catalogs: - # - /DATA/reconstruction.json - variables_and_freqs: - tas: D - tasmax: D - tasmin: D - pr: D - other_search_criteria: - # source: ERA5-Land - # domain: NAM - allow_conversion: True # Mainly for (tasmax, tasmin)-> tas - allow_resampling: True # To get from hourly to daily + ## Arguments to select the dataset and extract it. + ## Example args as comments, good defaults uncommented + search_data_catalogs: + data_catalogs: + # - /DATA/reconstruction.json + variables_and_freqs: + tas: D + tasmax: D + tasmin: D + pr: D + other_search_criteria: + # source: ERA5-Land + # domain: NAM + allow_conversion: True # Mainly for (tasmax, tasmin)-> tas + allow_resampling: True # To get from hourly to daily - extract_dataset: - ## This might not need any arguments - ## But a region might be of interest: - # region : - # name: region_name - # method: bbox - # lat_bnds: [45, 47] - # lon_bnds: [-75, -70] + extract_dataset: + ## This might not need any arguments + ## But a region might be of interest: + # region : + # name: region_name + # method: bbox + # lat_bnds: [45, 47] + # lon_bnds: [-75, -70] io: - save_to_zarr: - # Mode o will remove existing variables in the zarr, the script already checks what was already computed through the catalog. - # but the dataset itself and other variables are preserved. - mode: o - itervar: False # if the computation is too slow, write one indicator at a time by turning this to True. - rechunk: # Normal chunking of daily data would be too small for indicators, one only usually needs to change the time chunk - time: 40 + save_to_zarr: + # Mode o will remove existing variables in the zarr, the script already checks what was already computed through the catalog. + # but the dataset itself and other variables are preserved. + mode: o + itervar: False # if the computation is too slow, write one indicator at a time by turning this to True. + rechunk: # Normal chunking of daily data would be too small for indicators, one only usually needs to change the time chunk + time: 40 scripting: - send_main_on_exit: - subject: "Indicator computing terminated." - msg_ok: "🥳 Everything went well." - msg_err: "🔥 Something went wrong while computing the indicators. 🔥" + send_main_on_exit: + subject: "Indicator computing terminated." + msg_ok: "🥳 Everything went well." + msg_err: "🔥 Something went wrong while computing the indicators. 🔥" diff --git a/templates/2-indicators_only/workflow2.py b/templates/2-indicators_only/workflow2.py index 6234656f..e31a9a62 100644 --- a/templates/2-indicators_only/workflow2.py +++ b/templates/2-indicators_only/workflow2.py @@ -61,12 +61,13 @@ to_compute.append((name, ind)) if not to_compute: - logger.info(f"Everything computed for {dsid}.") + msg = f"Everything computed for {dsid}." + logger.info(msg) continue outd = compute_indicators(ds, indicators=to_compute, to_level="indicators") - for freq, outds in outd.items(): + for outds in outd.values(): outpath = CONFIG["main"]["outfilename"].format(**get_cat_attrs(outds)) save_to_zarr(outds, outpath) pcat.update_from_ds(outds, path=outpath) diff --git a/tests/test_aggregate.py b/tests/test_aggregate.py index 0d7f10d3..fde0d965 100644 --- a/tests/test_aggregate.py +++ b/tests/test_aggregate.py @@ -14,21 +14,8 @@ xe = None -class TestClimatologicalMean: - def test_future_warning(self): - ds = timeseries( - np.tile(np.arange(1, 13), 3), - variable="tas", - start="2001-01-01", - freq="MS", - as_dataset=True, - ) - with pytest.warns(FutureWarning): - xs.climatological_mean(ds) - - class TestComputeDeltas: - ds = xs.climatological_mean( + ds = xs.climatological_op( timeseries( np.repeat(np.arange(1, 5), 30).astype(float), variable="tas", @@ -36,8 +23,10 @@ class TestComputeDeltas: freq="YS", as_dataset=True, ), + op="mean", window=30, - interval=30, + stride=30, + rename_variables=False, ) @pytest.mark.parametrize( @@ -108,7 +97,7 @@ def test_input_ds(self): def test_freqs(self, xrfreq): o = 12 if xrfreq == "MS" else 4 if xrfreq == "QS" else 1 - ds = xs.climatological_mean( + ds = xs.climatological_op( timeseries( np.repeat(np.arange(1, 5), 30 * o).astype(float), variable="tas", @@ -116,8 +105,10 @@ def test_freqs(self, xrfreq): freq=xrfreq, as_dataset=True, ), + op="mean", window=30, - interval=30, + stride=30, + rename_variables=False, ) out = xs.compute_deltas( @@ -376,12 +367,6 @@ def test_to_level(self): assert out.attrs["cat:processing_level"] == "warminglevel+1Cvs1851-1901" def test_errors(self): - # FutureWarning - with pytest.warns(FutureWarning, match="The 'period' argument is deprecated"): - xs.produce_horizon( - self.ds, indicators=self.yaml_file, period=["1982", "1988"] - ) - # Bad input with pytest.raises( ValueError, match="Could not understand the format of warminglevels" diff --git a/tests/test_biasadjust.py b/tests/test_biasadjust.py index 5a51d6c0..b80ccdb2 100644 --- a/tests/test_biasadjust.py +++ b/tests/test_biasadjust.py @@ -47,9 +47,7 @@ def test_basic_train(self, var, period): def test_preprocess(self): - dref360 = xc.core.calendar.convert_calendar( - self.dref, "360_day", align_on="year" - ) + dref360 = self.dref.convert_calendar("360_day", align_on="year") out = xs.train( dref360, @@ -131,7 +129,7 @@ def test_basic( self, periods, to_level, bias_adjust_institution, bias_adjust_project ): dtrain = xs.train( - self.dref, + self.dref.copy(), self.dsim.sel(time=slice("2001", "2003")), var="tas", period=["2001", "2003"], @@ -139,7 +137,7 @@ def test_basic( out = xs.adjust( dtrain, - self.dsim, + self.dsim.copy(), periods=periods, to_level=to_level, bias_adjust_institution=bias_adjust_institution, @@ -154,7 +152,7 @@ def test_basic( "name='time.dayofyear', window=31), kind='+'" ").adjust(sim, )" ) - assert xc.core.calendar.get_calendar(out) == "noleap" + assert out.time.dt.calendar == "noleap" if bias_adjust_institution is not None: assert out.attrs["cat:bias_adjust_institution"] == "i" @@ -177,9 +175,9 @@ def test_basic( np.concatenate([np.ones(365 * 1) * 1, np.ones(365 * 1) * 3]), ) - def test_write_train(self): + def test_write_train(self, tmpdir): dtrain = xs.train( - self.dref, + self.dref.copy(), self.dsim.sel(time=slice("2001", "2003")), var="tas", period=["2001", "2003"], @@ -188,7 +186,7 @@ def test_write_train(self): jitter_under={"thresh": "2 K"}, ) - root = str(notebooks / "_data") + root = str(tmpdir / "_data") xs.save_to_zarr(dtrain, f"{root}/test.zarr", mode="o") dtrain2 = xr.open_dataset( f"{root}/test.zarr", chunks={"dayofyear": 365, "quantiles": 15} @@ -196,7 +194,7 @@ def test_write_train(self): out = xs.adjust( dtrain, - self.dsim, + self.dsim.copy(), periods=["2001", "2006"], xclim_adjust_args={ "detrend": { @@ -207,7 +205,7 @@ def test_write_train(self): out2 = xs.adjust( dtrain2, - self.dsim, + self.dsim.copy(), periods=["2001", "2006"], xclim_adjust_args={ "detrend": { @@ -291,15 +289,9 @@ def test_xclim_vs_xscen( with xc.set_options(sdba_extra_output=True): group = xc.sdba.Grouper(group="time.dayofyear", window=31) - drefx = xc.core.calendar.convert_calendar( - dref.sel(time=slice("2001", "2003")), "noleap" - ) - dhistx = xc.core.calendar.convert_calendar( - dhist.sel(time=slice("2001", "2003")), "noleap" - ) - dsimx = xc.core.calendar.convert_calendar( - dsim.sel(time=slice("2001", "2006")), "noleap" - ) + drefx = dref.sel(time=slice("2001", "2003")).convert_calendar("noleap") + dhistx = dhist.sel(time=slice("2001", "2003")).convert_calendar("noleap") + dsimx = dsim.sel(time=slice("2001", "2006")).convert_calendar("noleap") dhist_ad, pth, dP0 = xc.sdba.processing.adapt_freq( drefx["pr"], dhistx["pr"], group=group, thresh="1 mm d-1" diff --git a/tests/test_catalog.py b/tests/test_catalog.py index abb324ac..8895c25d 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -1,7 +1,10 @@ +from pathlib import Path + import pandas as pd +import xarray as xr from conftest import SAMPLES_DIR -from xscen import catalog +from xscen import catalog, extract def test_subset_file_coverage(): @@ -35,3 +38,58 @@ def test_subset_file_coverage(): def test_xrfreq_fix(): cat = catalog.DataCatalog(SAMPLES_DIR.parent / "pangeo-cmip6.json") assert set(cat.df.xrfreq) == {"3h", "D", "fx"} + + +class TestCopyFiles: + def test_flat(self, samplecat, tmp_path): + newcat = samplecat.copy_files(tmp_path, flat=True) + assert len(list(tmp_path.glob("*.nc"))) == len(newcat.df) + + def test_inplace(self, samplecat, tmp_path): + dsid, scat = extract.search_data_catalogs( + data_catalogs=[samplecat], + variables_and_freqs={"tas": "MS"}, + allow_resampling=True, + other_search_criteria={ + "experiment": "ssp585", + "source": "NorESM.*", + "member": "r1i1p1f1", + }, + ).popitem() + scat.copy_files(tmp_path, inplace=True) + assert len(list(tmp_path.glob("*.nc"))) == len(scat.df) + + _, ds = extract.extract_dataset(scat).popitem() + frq = xr.infer_freq(ds.time) + assert frq == "MS" + + def test_zipunzip(self, samplecat, tmp_path): + dsid, scat = extract.search_data_catalogs( + data_catalogs=[samplecat], + variables_and_freqs={"tas": "D"}, + allow_resampling=True, + other_search_criteria={ + "experiment": "ssp585", + "source": "NorESM.*", + "member": "r1i1p1f1", + }, + ).popitem() + _, ds = extract.extract_dataset(scat).popitem() + ds.to_zarr(tmp_path / "temp.zarr") + scat.esmcat.df.loc[0, "path"] = tmp_path / "temp.zarr" + + rz = tmp_path / "zipped" + rz.mkdir() + scat_z = scat.copy_files(rz, zipzarr=True) + f = Path(scat_z.df.path.iloc[0]) + assert f.suffix == ".zip" + assert f.parent.name == rz.name + assert f.is_file() + + ru = tmp_path / "unzipped" + ru.mkdir() + scat_uz = scat.copy_files(ru, unzip=True) + f = Path(scat_uz.df.path.iloc[0]) + assert f.suffix == ".zarr" + assert f.parent.name == ru.name + assert f.is_dir() diff --git a/tests/test_catutils.py b/tests/test_catutils.py index 6748c3c2..8d5d792b 100644 --- a/tests/test_catutils.py +++ b/tests/test_catutils.py @@ -20,7 +20,7 @@ [{".nc", ".zarr"}, {6, 7, 8}, "*ssp126*", 2], ), ) -def test_find_assets(exts, lens, dirglob, N): +def test_find_assets(exts, lens, dirglob, N): # noqa: N803 finder = cu._find_assets(str(SAMPLES_DIR), exts=exts, lengths=lens, dirglob=dirglob) assert isinstance(finder, Generator) assert len(list(finder)) == N @@ -228,6 +228,14 @@ def test_build_path(samplecat): ) in df.new_path.values +def test_pattern_from_schema(samplecat): + df = cu.build_path(samplecat, mip_era="CMIP5") + patts = cu.patterns_from_schema("original-sims-raw") + for p in df.new_path.values: + res = [cu._compile_pattern(patt).parse(p) for patt in patts] + assert any(res) + + def test_build_path_ds(): ds = xr.tutorial.open_dataset("air_temperature") ds = ds.assign(time=xr.cftime_range("0001-01-01", freq="6h", periods=ds.time.size)) diff --git a/tests/test_ensembles.py b/tests/test_ensembles.py index 6cbe9d58..6aa596e2 100644 --- a/tests/test_ensembles.py +++ b/tests/test_ensembles.py @@ -5,16 +5,26 @@ import pytest import xarray as xr import xclim as xc +from packaging.version import Version try: import xesmf as xe except ImportError: xe = None +# temp fix for changes to xclim-testdata +from functools import partial + from xclim.testing import open_dataset from xclim.testing.helpers import test_timeseries as timeseries import xscen as xs +# FIXME: Remove if-else when updating minimum xclim version to 0.53 +if Version(xc.__version__) < Version("0.53.0"): + # Hack to revert to old testdata with old xclim + open_dataset = partial(open_dataset, branch="v2023.12.14") + + LOGGER = logging.getLogger(__name__) diff --git a/tests/test_extract.py b/tests/test_extract.py index fc8c0569..0b9d0eff 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -364,14 +364,14 @@ def test_wrong_types(self): "CMIP6_CanESM5_ssp585_r1i1p1f1", wl=2, window=3.85, return_horizon=True ) - def test_DataArray(self): + def test_DataArray(self): # noqa: N802 reals = xr.DataArray( ["CMIP6_CanESM5_ssp126_r1i1p1f1"], dims=("x",), coords={"x": [1]} ) out = xs.get_warming_level(reals, wl=2, return_horizon=False) xr.testing.assert_identical(out, reals.copy(data=["2026"])) - def test_DataFrame(self): + def test_DataFrame(self): # noqa: N802 reals = pd.DataFrame.from_records( [ { diff --git a/tests/test_indicators.py b/tests/test_indicators.py index 3b7dd0b2..79878eb0 100644 --- a/tests/test_indicators.py +++ b/tests/test_indicators.py @@ -208,3 +208,18 @@ def test_select_inds_for_avail_vars(self, indicator_iter): ) assert len(list(inds_for_avail_vars.iter_indicators())) == 0 assert [(n, i) for n, i in inds_for_avail_vars.iter_indicators()] == [] + + +@pytest.mark.parametrize( + "ind,expvars,expfrq", + [ + ("wind_vector_from_speed", ["uas", "vas"], "D"), + ("fit", ["params"], "fx"), + ("tg_mean", ["tg_mean"], "YS-JAN"), + ], +) +def test_get_indicator_outputs(ind, expvars, expfrq): + ind = xclim.core.indicator.registry[ind.upper()].get_instance() + outvars, outfrq = xs.indicators.get_indicator_outputs(ind, "D") + assert outvars == expvars + assert outfrq == expfrq diff --git a/tests/test_io.py b/tests/test_io.py index a34c2880..7db611c9 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1,6 +1,7 @@ import numpy as np import pytest import xarray as xr +import xclim as xc import xscen as xs @@ -90,12 +91,24 @@ class TestToTable: .reset_index("site") .assign_coords(site=list("abcdef")) ).transpose("season", "time", "site") + ds.attrs = {"foo": "bar", "baz": 1, "qux": 2.0} + + @pytest.mark.parametrize( + "multiple, as_dataset", [(True, True), (False, True), (False, False)] + ) + def test_normal(self, multiple, as_dataset): + if multiple is False: + if as_dataset: + ds = self.ds[["tas"]].copy() + else: + ds = self.ds["tas"].copy() + else: + ds = self.ds.copy() - def test_normal(self): # Default - tab = xs.io.to_table(self.ds) - assert tab.shape == (120, 5) # 3 vars + 2 aux coords - assert tab.columns.names == ["variable"] + tab = xs.io.to_table(ds) + assert tab.shape == (120, 5 if multiple else 3) # 3 vars + 2 aux coords + assert tab.columns.names == ["variable"] if multiple else [None] assert tab.index.names == ["season", "time", "site"] # Season order is chronological, rather than alphabetical np.testing.assert_array_equal( @@ -105,15 +118,91 @@ def test_normal(self): ["JFM", "AMJ", "JAS", "OND"], ) - # Variable in the index, thus no coords + if multiple: + # Variable in the index, thus no coords + tab = xs.io.to_table( + ds, row=["time", "variable"], column=["season", "site"], coords=False + ) + assert tab.shape == (15, 24) + assert tab.columns.names == ["season", "site"] + np.testing.assert_array_equal( + tab.loc[("1993", "pr"), ("JFM",)], ds.pr.sel(time="1993", season="JFM") + ) + # Ensure that the coords are not present + assert ( + len( + set(tab.index.get_level_values("variable").unique()).difference( + ["tas", "pr", "snw"] + ) + ) + == 0 + ) + + def test_sheet(self): tab = xs.io.to_table( - self.ds, row=["time", "variable"], column=["season", "site"], coords=False - ) - assert tab.shape == (15, 24) - assert tab.columns.names == ["season", "site"] - np.testing.assert_array_equal( - tab.loc[("1993", "pr"), ("JFM",)], self.ds.pr.sel(time="1993", season="JFM") + self.ds, + row=["time", "variable"], + column=["season"], + sheet="site", + coords=False, ) + assert set(tab.keys()) == {("a",), ("b",), ("c",), ("d",), ("e",), ("f",)} + assert tab[("a",)].shape == (15, 4) # 5 time * 3 variable X 4 season + + def test_error(self): + with pytest.raises(ValueError, match="Repeated dimension names."): + xs.io.to_table( + self.ds, row=["time", "variable"], column=["season", "site", "time"] + ) + with pytest.raises(ValueError, match="Passed row, column and sheet"): + xs.io.to_table( + self.ds, row=["time", "variable"], column=["season", "site", "foo"] + ) + with pytest.raises( + NotImplementedError, + match="Keeping auxiliary coords is not implemented when", + ): + xs.io.to_table( + self.ds, + row=["time", "variable"], + column=["season", "site"], + coords=True, + ) + + @pytest.mark.parametrize("as_dataset", [True, False]) + def test_make_toc(self, as_dataset): + ds = self.ds.copy() + for v in ds.data_vars: + ds[v].attrs["long_name"] = f"Long name for {v}" + ds[v].attrs["long_name_fr"] = f"Nom long pour {v}" + + if as_dataset is False: + ds = ds["tas"] + + with xc.set_options(metadata_locales="fr"): + toc = xs.io.make_toc(ds) + + if as_dataset: + assert toc.shape == (8, 2) + assert toc.columns.tolist() == ["Description", "Unités"] + assert toc.index.tolist() == [ + "tas", + "pr", + "snw", + "", + "Attributs globaux", + "foo", + "baz", + "qux", + ] + assert toc.loc["tas", "Description"] == "Nom long pour tas" + assert toc.loc["tas", "Unités"] == "K" + else: + assert toc.shape == (1, 2) + assert toc.columns.tolist() == ["Description", "Unités"] + assert toc.index.tolist() == ["tas"] + assert toc.loc["tas", "Description"] == "Nom long pour tas" + assert toc.loc["tas", "Unités"] == "K" def test_round_bits(datablock_3d): diff --git a/tests/test_regrid.py b/tests/test_regrid.py index af8a9a33..0b725535 100644 --- a/tests/test_regrid.py +++ b/tests/test_regrid.py @@ -1,51 +1,383 @@ +import hashlib +from pathlib import Path + import numpy as np import pytest +import xarray as xr try: import xesmf as xe except ImportError: xe = None -from xscen.regrid import create_bounds_rotated_pole, regrid_dataset +import xscen as xs from xscen.testing import datablock_3d -def test_create_bounds_rotated_pole(): - ds = datablock_3d( - np.zeros((20, 10, 10)), +class TestCreateBoundsGridmapping: + def test_create_bounds_rotated_pole(self): + ds = datablock_3d( + np.zeros((20, 10, 10)), + "tas", + "rlon", + -5, + "rlat", + 80.5, + 1, + 1, + "2000-01-01", + as_dataset=True, + ) + bnds = xs.regrid.create_bounds_gridmapping(ds, "rotated_pole") + np.testing.assert_allclose(bnds.lon_bounds[-1, -1, 1], 83) + np.testing.assert_allclose(bnds.lat_bounds[-1, -1, 1], 42.5) + + assert xs.regrid.create_bounds_rotated_pole(ds).equals(bnds) + + def test_create_bounds_oblique(self): + ds = datablock_3d( + np.zeros((20, 10, 10)), + "tas", + "x", + -5000, + "y", + 5000, + 100000, + 100000, + "2000-01-01", + as_dataset=True, + ) + bnds = xs.regrid.create_bounds_gridmapping(ds, "oblique_mercator") + np.testing.assert_allclose(bnds.lon_bounds[-1, -1, -1], -48.98790806) + np.testing.assert_allclose(bnds.lat_bounds[-1, -1, -1], 52.9169163) + + def test_error(self): + ds = datablock_3d( + np.zeros((20, 10, 10)), + "tas", + "x", + -5000, + "y", + 5000, + 100000, + 100000, + "2000-01-01", + as_dataset=True, + ) + with pytest.raises(NotImplementedError): + xs.regrid.create_bounds_gridmapping(ds, "lambert_conformal_conic") + + +@pytest.mark.skipif(xe is None, reason="xesmf needed for testing regrdding") +class TestRegridDataset: + @staticmethod + def compute_file_hash(file_path): + """Compute the SHA-256 hash of the specified file.""" + sha256 = hashlib.sha256() + with Path(file_path).open("rb") as f: + for block in iter(lambda: f.read(4096), b""): + sha256.update(block) + return sha256.hexdigest() + + dsin_reg = datablock_3d( + np.zeros((10, 6, 6)), + "tas", + "lon", + -142, + "lat", + 0, + 2, + 2, + "2000-01-01", + as_dataset=True, + ) + dsin_reg = dsin_reg.chunk({"lon": 3, "time": 1}) + dsin_reg.attrs["cat:id"] = "CMIP5" + dsin_reg.attrs["cat:member"] = "MIP5" + dsin_reg.attrs["cat:domain"] = "Global" + + dsout_rp = datablock_3d( + np.zeros((2, 10, 10)), "tas", "rlon", -5, "rlat", - 80.5, + -5, 1, 1, "2000-01-01", as_dataset=True, ) - bnds = create_bounds_rotated_pole(ds) - np.testing.assert_allclose(bnds.lon_bounds[-1, -1, 1], 83) - np.testing.assert_allclose(bnds.lat_bounds[-1, -1, 1], 42.5) + dsout_rp.attrs["cat:id"] = "CORDEX" + dsout_rp.attrs["cat:domain"] = "RegionEssai" - -@pytest.mark.skipif(xe is None, reason="xesmf needed for testing regrdding") -class TestRegridDataset: def test_simple(self, tmp_path): - dsout = datablock_3d( - np.zeros((2, 10, 10)), - "tas", - "rlon", - -5, - "rlat", - -5, + out = xs.regrid_dataset( + self.dsin_reg, + self.dsout_rp, + weights_location=tmp_path / "weights", + regridder_kwargs={ + "method": "patch", + "output_chunks": {"rlon": 5}, + "unmapped_to_nan": True, + }, + ) + + assert ( + tmp_path / "weights" / "C_Global_CORDEX_RegionEssai_regrid0patch.nc" + ).is_file() + assert out.tas.attrs["grid_mapping"] == "rotated_pole" + assert out.rotated_pole.attrs == self.dsout_rp.rotated_pole.attrs + assert "patch" in out.attrs["history"] + assert out.attrs["cat:processing_level"] == "regridded" + assert out.chunks["rlon"] == (5, 5) + + hash1 = self.compute_file_hash( + tmp_path / "weights" / "C_Global_CORDEX_RegionEssai_regrid0patch.nc" + ) + xs.regrid_dataset( + self.dsin_reg, + self.dsout_rp, + weights_location=tmp_path / "weights", + regridder_kwargs={ + "method": "patch", + "output_chunks": {"rlon": 5}, + "unmapped_to_nan": True, + }, + ) + # Check that the weights are not recomputed + hash2 = self.compute_file_hash( + tmp_path / "weights" / "C_Global_CORDEX_RegionEssai_regrid0patch.nc" + ) + assert hash1 == hash2 + + def test_mask(self): + ds_in = self.dsin_reg.copy() + ds_in["tas"].loc[dict(lon=-142, lat=0)] = 999999 + ds_in["mask"] = xr.ones_like(ds_in.tas.isel(time=0)) + ds_in["mask"].loc[dict(lon=-142, lat=0)] = 0 + + grid = xe.util.cf_grid_2d(-140, -134, 1, 2, 8, 1) + grid["mask"] = xr.DataArray(np.ones((6, 6)), dims=("lat", "lon")) + grid["mask"].loc[dict(lon=-134.5, lat=7.5)] = 0 + + out = xs.regrid_dataset( + ds_in, + grid, + regridder_kwargs={ + "method": "bilinear", + "skipna": True, + }, + ) + assert "mask" not in out + np.testing.assert_equal( + out.tas.isel(time=0), xr.where(grid["mask"] == 0, np.nan, 0).values + ) + + @pytest.mark.parametrize( + "unmapped_to_nan, skipna", + [[True, False], [False, True], [False, False], [None, False]], + ) + def test_unmapped_to_nan(self, unmapped_to_nan, skipna): + out = xs.regrid_dataset( + self.dsin_reg, + xe.util.cf_grid_2d(-140, -130, 1, 2, 8, 1), + regridder_kwargs={ + "method": "bilinear", + "skipna": skipna, + "unmapped_to_nan": unmapped_to_nan, + }, + ) + if skipna is False and not unmapped_to_nan: + # This is the only case where unmapped NaNs will be extrapolated + np.testing.assert_equal( + out.tas.isel(time=0, lat=0), np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + ) + else: + np.testing.assert_equal( + out.tas.isel(time=0, lat=0), + np.array([0, 0, 0, 0, 0, 0, 0, 0, np.nan, np.nan]), + ) + + def test_no_regrid(self, tmp_path): + out = xs.regrid_dataset( + self.dsin_reg, + self.dsin_reg, + regridder_kwargs={ + "method": "patch", + "output_chunks": {"rlon": 5}, + "unmapped_to_nan": True, + }, + to_level="regridded2", + ) + assert out.equals(self.dsin_reg) + assert out.attrs["cat:processing_level"] == "regridded2" + + # Add a mask + ds_in = self.dsin_reg.copy() + ds_in["mask"] = xr.ones_like(ds_in.tas.isel(time=0)) + ds_in["mask"].loc[dict(lon=-142, lat=0)] = 0 + ds_grid = self.dsin_reg.copy() + ds_grid["mask"] = xr.ones_like(ds_grid.tas.isel(time=0)) + ds_grid["mask"].loc[dict(lon=-142, lat=2)] = 0 + out = xs.regrid_dataset( + ds_in, + ds_grid, + regridder_kwargs={ + "method": "patch", + "output_chunks": {"rlon": 5}, + "unmapped_to_nan": True, + }, + ) + np.testing.assert_allclose( + out.tas.isel(time=2, lon=0), np.array([np.nan, np.nan, 0, 0, 0, 0]) + ) + np.testing.assert_allclose( + out.tas.isel(time=2, lon=1), np.array([0, 0, 0, 0, 0, 0]) + ) + assert "mask" not in out + + def test_intermediate(self, tmp_path): + intermediate = { + "intermediate": { + "cf_grid_2d": { + "lon0_b": -142, + "lon1_b": -132, + "d_lon": 1, + "lat0_b": 0, + "lat1_b": 10, + "d_lat": 1, + }, + "regridder_kwargs": {"method": "bilinear", "skipna": True}, + } + } + out = xs.regrid_dataset( + self.dsin_reg, + self.dsout_rp, + weights_location=tmp_path / "weights", + intermediate_grids=intermediate, + regridder_kwargs={ + "method": "patch", + "output_chunks": {"rlon": 5}, + "unmapped_to_nan": True, + }, + ) + assert ( + tmp_path / "weights" / "C_Global_CORDEX_RegionEssai_regrid0bilinear.nc" + ).is_file() + assert ( + tmp_path / "weights" / "C_Global_CORDEX_RegionEssai_regrid1patch.nc" + ).is_file() + assert "cf_grid_2d with arguments" in out.attrs["history"] + + @pytest.mark.parametrize("gridmap", ["oblique_mercator", "rotated_pole"]) + def test_conservative_in(self, tmp_path, gridmap): + mult = 1 if gridmap == "rotated_pole" else 100000 + + ds_in = datablock_3d( + np.tile(np.arange(6), (1, 4, 1)) / 86400, + "pr", + "rlon" if gridmap == "rotated_pole" else "x", + 0, + "rlat" if gridmap == "rotated_pole" else "y", + 0, + 1 * mult, + 1 * mult, + "2000-01-01", + as_dataset=True, + units="kg m-2 s-1", + ) + grid = xe.util.cf_grid_2d( + ds_in.lon.min().values - 0.5, + ds_in.lon.max().values + 0.5, + 0.22, + ds_in.lat.min().values - 0.5, + ds_in.lat.max().values + 0.5, + 0.22, + ) + + out = xs.regrid_dataset( + ds_in, + grid, + regridder_kwargs={ + "method": "conservative", + "skipna": False, + }, + ) + assert out.attrs["regrid_method"] == "conservative" + assert "bounds" not in out.dims + assert "lon_bounds" not in out + assert "lat_bounds" not in out + assert gridmap not in out + + def test_conservative_out(self, tmp_path): + ds_in = datablock_3d( + np.tile(np.arange(6), (1, 4, 1)) / 86400, + "pr", + "lon", + -142.5, + "lat", + 2, + 5, 1, + "2000-01-01", + as_dataset=True, + units="kg m-2 s-1", + ) + + out = xs.regrid_dataset( + ds_in, + self.dsout_rp, + regridder_kwargs={ + "method": "conservative", + "skipna": False, + }, + ) + assert out.attrs["regrid_method"] == "conservative" + assert "bounds" not in out.dims + assert "lon_bounds" not in out + assert "lat_bounds" not in out + assert "rotated_pole" in out.coords + assert all(d in out.dims for d in ["rlon", "rlat"]) + assert all(c in out.coords for c in ["lon", "lat"]) + assert out.pr.attrs["grid_mapping"] == "rotated_pole" + + def test_conservative_multiple(self): + ds_in = datablock_3d( + np.tile(np.arange(6), (1, 4, 1)) / 86400, + "pr", + "lon", + -142.5, + "lat", + 2, + 5, 1, "2000-01-01", as_dataset=True, + units="kg m-2 s-1", ) - dsout.attrs["cat:domain"] = "Région d'essai" - dsin = datablock_3d( - np.zeros((10, 6, 6)), + out = xs.regrid_dataset( + ds_in, + self.dsout_rp, + regridder_kwargs={ + "method": "conservative", + "skipna": False, + }, + ) + assert out.attrs["regrid_method"] == "conservative" + assert "bounds" not in out.dims + assert "lon_bounds" not in out + assert "lat_bounds" not in out + assert "rotated_pole" in out.coords + assert all(d in out.dims for d in ["rlon", "rlat"]) + assert all(c in out.coords for c in ["lon", "lat"]) + assert out.pr.attrs["grid_mapping"] == "rotated_pole" + + +class TestGetGrid: + def test_none(self): + ds = datablock_3d( + np.zeros((20, 10, 10)), "tas", "lon", -142, @@ -56,22 +388,108 @@ def test_simple(self, tmp_path): "2000-01-01", as_dataset=True, ) - dsin = dsin.chunk({"lon": 3, "time": 1}) + assert xs.regrid._get_grid_mapping(ds) == "" - out = regrid_dataset( - dsin, - dsout, - tmp_path / "weights", - regridder_kwargs={ - "method": "patch", - "output_chunks": {"rlon": 5}, - "unmapped_to_nan": True, - }, + def test_rotated_pole(self): + ds = datablock_3d( + np.zeros((20, 10, 10)), + "tas", + "rlon", + -142, + "rlat", + 0, + 2, + 2, + "2000-01-01", + as_dataset=True, ) + assert xs.regrid._get_grid_mapping(ds) == "rotated_pole" - assert (tmp_path / "weights" / "weights_regrid0patch.nc").is_file() - assert out.tas.attrs["grid_mapping"] == "rotated_pole" - assert out.rotated_pole.attrs == dsout.rotated_pole.attrs - assert "patch" in out.attrs["history"] - assert out.attrs["cat:processing_level"] == "regridded" - assert out.chunks["rlon"] == (5, 5) + ds_no_coord = ds.copy() + ds_no_coord = ds_no_coord.drop_vars("rotated_pole") + assert xs.regrid._get_grid_mapping(ds_no_coord) == "rotated_pole" + + ds_no_var = ds.copy() + ds_no_var = ds_no_var.drop_vars("tas") + assert xs.regrid._get_grid_mapping(ds_no_var) == "rotated_pole" + + def test_error(self): + ds = datablock_3d( + np.zeros((20, 10, 10)), + "tas", + "x", + -5000, + "y", + 5000, + 100000, + 100000, + "2000-01-01", + as_dataset=True, + ) + ds["tas"].attrs["grid_mapping"] = "lambert_conformal_conic" + with pytest.warns( + UserWarning, match="There are conflicting grid_mapping attributes" + ): + assert xs.regrid._get_grid_mapping(ds) == "lambert_conformal_conic" + + +class TestMask: + ds = datablock_3d( + np.tile(np.arange(6), (1, 4, 1)), + "tas", + "lon", + -142.5, + "lat", + 2, + 5, + 1, + "2000-01-01", + as_dataset=True, + ) + ds["tas"] = ds["tas"].where(ds["tas"].lon > -142.5) + + @pytest.mark.parametrize("mask_nans", [True, False]) + def test_mask_simple(self, mask_nans): + mask = xs.regrid.create_mask( + self.ds, + variable="tas", + where_operator=">", + where_threshold=2, + mask_nans=mask_nans, + ) + assert isinstance(mask, xr.DataArray) + assert mask.attrs["where_threshold"] == "tas > 2" + assert mask.attrs["mask_NaNs"] == str(mask_nans) + assert "time" not in mask.dims + np.testing.assert_allclose( + mask, np.stack([np.array([0 if mask_nans else 1, 0, 0, 1, 1, 1])] * 4) + ) + + mask2 = xs.regrid.create_mask( + self.ds["tas"], where_operator=">", where_threshold=2, mask_nans=mask_nans + ) + assert mask2.equals(mask) + + def test_units(self): + mask = xs.regrid.create_mask( + self.ds, variable="tas", where_operator=">=", where_threshold="2 K" + ) + assert mask.attrs["where_threshold"] == "tas >= 2 K" + np.testing.assert_allclose(mask, np.stack([np.array([0, 0, 1, 1, 1, 1])] * 4)) + + mask2 = xs.regrid.create_mask( + self.ds, variable="tas", where_operator=">=", where_threshold="2 C" + ) + assert mask2.attrs["where_threshold"] == "tas >= 2 C" + np.testing.assert_allclose(mask2, np.stack([np.array([0, 0, 0, 0, 0, 0])] * 4)) + + def test_error(self): + with pytest.raises( + ValueError, + match="'where_operator' and 'where_threshold' must be used together.", + ): + xs.regrid.create_mask(self.ds, variable="tas", where_operator=">") + with pytest.raises( + ValueError, match="A variable needs to be specified when passing a Dataset." + ): + xs.regrid.create_mask(self.ds) diff --git a/tests/test_scripting.py b/tests/test_scripting.py index 489afbdc..2c4eb72a 100644 --- a/tests/test_scripting.py +++ b/tests/test_scripting.py @@ -45,6 +45,8 @@ def test_save_and_update(self): TestScripting.ds, cat, file_format="nc", + # To fix hdf5 issues with h5py 3.11 on pip + save_kwargs=dict(netcdf_kwargs={"engine": "netcdf4"}), build_path_kwargs={"root": root}, ) diff --git a/tests/test_spatial.py b/tests/test_spatial.py index b7559efd..f28e656e 100644 --- a/tests/test_spatial.py +++ b/tests/test_spatial.py @@ -241,6 +241,29 @@ def test_subset_bboxshape(self, kwargs, tile_buffer, method): np.testing.assert_array_equal(out["lon"], np.arange(-63, -59)) np.testing.assert_array_equal(out["lat"], np.arange(47, 51)) + @pytest.mark.parametrize("crs", ["bad", "EPSG:3857", "EPSG:4326"]) + def test_shape_crs(self, crs): + gdf = gpd.GeoDataFrame( + {"geometry": [Polygon([(-63, 47), (-63, 50), (-60, 50), (-60, 47)])]} + ) + if crs != "bad": + gdf.crs = crs + if crs != "EPSG:4326": + with pytest.warns(UserWarning, match="Reprojecting to this CRS"): + with pytest.raises( + ValueError, match="No grid cell centroids" + ): # This is from clisops, this is not our warning + xs.spatial.subset(self.ds, "shape", shape=gdf, tile_buffer=5) + else: + # Make sure there is no warning about reprojection + with pytest.warns() as record: + xs.spatial.subset(self.ds, "shape", shape=gdf, tile_buffer=5) + assert not any("Reprojecting to this CRS" in str(w) for w in record) + + else: + with pytest.warns(UserWarning, match="does not have a CRS"): + xs.spatial.subset(self.ds, "shape", shape=gdf, tile_buffer=5) + def test_subset_sel(self): ds = datablock_3d( np.ones((3, 50, 50)), diff --git a/tests/test_utils.py b/tests/test_utils.py index 5c091481..b5fff0f8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,12 +1,67 @@ from datetime import datetime +from pathlib import Path import cftime +import dask.array import numpy as np import pandas as pd import pytest +import xarray as xr +import xclim as xc from xclim.testing.helpers import test_timeseries as timeseries import xscen as xs +from xscen.testing import datablock_3d + + +class TestLocale: + def test_update(self): + ds = timeseries( + np.tile(np.arange(1, 366), 30), + variable="tas", + start="2001-01-01", + freq="D", + as_dataset=True, + ) + indicator = xc.core.indicator.Indicator.from_dict( + data={"base": "tg_mean"}, + identifier="tg_mean", + module="atmos", + ) + with xc.set_options(metadata_locales="fr"): + out = xs.compute_indicators(ds, [("tg_mean", indicator)])["YS-JAN"] + out = xs.climatological_op(out, op="mean") + + assert ( + out["tg_mean_clim_mean"].attrs["long_name"] + == "30-year climatological average of Mean daily mean temperature." + ) + assert ( + out["tg_mean_clim_mean"].attrs["long_name_fr"] + == "Moyenne 30 ans de Moyenne de la température moyenne quotidienne." + ) + + @pytest.mark.parametrize("locale", ["fr", "jp"]) + def test_add(self, locale): + # Dummy function to make gettext aware of translatable-strings + def _(s): + return s + + ds = timeseries( + np.arange(1, 366), + variable="tas", + start="2001-01-01", + freq="D", + as_dataset=True, + ) + with xc.set_options(metadata_locales=locale): + xs.utils.add_attr(ds, "some_attr", _("Ranking of measure performance")) + assert ds.attrs["some_attr"] == "Ranking of measure performance" + if locale == "fr": + assert ds.attrs["some_attr_fr"] == "Classement de performance de la mesure" + elif locale == "jp": + # Japanese translation is not available, so the original string is used + assert ds.attrs["some_attr_jp"] == "Ranking of measure performance" class TestDateParser: @@ -38,6 +93,12 @@ class TestDateParser: ), ("abc", None, "datetime", pd.Timestamp("NaT")), ("", True, "datetime", pd.Timestamp("NaT")), + ( + pd.Period("2001-07-08", "H"), + None, + "datetime", + pd.Timestamp("2001-07-08"), + ), ], ) def test_normal(self, date, end_of_period, dtype, exp): @@ -48,12 +109,106 @@ def test_normal(self, date, end_of_period, dtype, exp): assert out == exp +class TestMinCal: + @pytest.mark.parametrize( + "cals", + [ + ["360_day", "365_day"], + ["365_day", "default"], + ["noleap", "default"], + ["365_day", "noleap"], + ["365_day", "all_leap"], + ["366_day", "all_leap"], + ["366_day", "default"], + ], + ) + def test_minimum_calendar(self, cals): + out = xs.utils.minimum_calendar(cals) + if "360_day" in cals: + assert out == "360_day" + elif any(c in cals for c in ["noleap", "365_day"]): + assert out == "noleap" + elif any(c in cals for c in ["default", "standard"]): + assert out == "standard" + else: + assert out == "all_leap" + + def test_error(self): + with pytest.warns(UserWarning, match="These calendars are not recognized"): + xs.utils.minimum_calendar(["366_day", "foo"]) + + +class TestTranslateTimeChunk: + @pytest.mark.parametrize("chunk", [-1, 10]) + def test_normal(self, chunk): + out = xs.utils.translate_time_chunk({"time": chunk, "lon": 50}, "noleap", 3450) + assert out == {"time": 3450 if chunk == -1 else 10, "lon": 50} + + @pytest.mark.parametrize("calendar", ["360_day", "standard", "365_day", "366_day"]) + def test_ny(self, calendar): + ndays = int(calendar.split("_")[0]) if "day" in calendar else 365.25 + out = xs.utils.translate_time_chunk( + {"time": "4year", "lon": 50}, calendar, 3450 + ) + assert out == {"time": ndays * 4, "lon": 50} + + def test_warning(self): + with pytest.warns(UserWarning, match="The number of days"): + xs.utils.translate_time_chunk( + {"time": "3year", "lon": 50}, "standard", 3450 + ) + + def test_dict_of_dict(self): + out = xs.utils.translate_time_chunk( + {"tas": {"time": 10, "lon": 50}, "pr": {"time": -1, "lon": 50}}, + "noleap", + 3450, + ) + assert out == {"tas": {"time": 10, "lon": 50}, "pr": {"time": 3450, "lon": 50}} + + +def test_naturalsort(): + assert xs.utils.natural_sort(["r1i1p1", "r2i1p1", "r10i1p1", "r1i1p2"]) == [ + "r1i1p1", + "r1i1p2", + "r2i1p1", + "r10i1p1", + ] + + +def get_cat_attrs(): + ds = timeseries( + np.tile(np.arange(1, 2), 50), + variable="tas", + start="2000-01-01", + freq="YS-JAN", + as_dataset=True, + ) + ds.attrs = { + "foo": "bar", + "cat:type": "simulation", + "cat:variable": ("tas",), + "dog:source": "CanESM5", + } + + assert xs.utils.get_cat_attrs(ds) == {"type": "simulation", "variable": ("tas",)} + assert xs.utils.get_cat_attrs(ds, var_as_str=True) == { + "type": "simulation", + "variable": "tas", + } + assert xs.utils.get_cat_attrs(ds, prefix="dog:") == {"source": "CanESM5"} + assert xs.utils.get_cat_attrs(ds.attrs) == { + "type": "simulation", + "variable": ("tas",), + } + + class TestScripting: ds = timeseries( np.tile(np.arange(1, 2), 50), variable="tas", start="2000-01-01", - freq="AS-JAN", + freq="YS-JAN", as_dataset=True, ) ds.attrs = { @@ -64,10 +219,12 @@ class TestScripting: } @pytest.mark.parametrize( - "prefix, var_as_str", [["cat:", False], ["cat:", True], ["dog:", True]] + "ds, prefix, var_as_str", + [["ds", "cat:", False], ["dict", "cat:", True], ["ds", "dog:", True]], ) - def test_get_cat_attrs(self, prefix, var_as_str): - out = xs.utils.get_cat_attrs(self.ds, prefix=prefix, var_as_str=var_as_str) + def test_get_cat_attrs(self, ds, prefix, var_as_str): + data = self.ds if ds == "ds" else self.ds.attrs + out = xs.utils.get_cat_attrs(data, prefix=prefix, var_as_str=var_as_str) if var_as_str and prefix == "cat:": assert out == { @@ -83,3 +240,967 @@ def test_get_cat_attrs(self, prefix, var_as_str): } elif prefix == "dog:": assert out == {"source": "CanESM5"} + + +class TestStack: + def test_no_nan(self): + ds = datablock_3d( + np.zeros((20, 10, 10)), + "tas", + "lon", + -5, + "lat", + 80.5, + 1, + 1, + "2000-01-01", + as_dataset=True, + ) + mask = xr.where(ds.tas.isel(time=0).isnull(), False, True).drop_vars("time") + out = xs.utils.stack_drop_nans(ds, mask=mask) + assert "loc" in out.dims + assert out.sizes["loc"] == 100 + + ds_unstack = xs.utils.unstack_fill_nan(out) + assert ds_unstack.equals(ds) + + def test_nan(self, tmp_path): + data = np.zeros((20, 10, 10)) + data[:, 0, 0] = [np.nan] * 20 + ds = datablock_3d( + data, + "tas", + "lon", + -5, + "lat", + 80.5, + 1, + 1, + "2000-01-01", + as_dataset=True, + ) + + mask = xr.where(ds.tas.isel(time=0).isnull(), False, True).drop_vars("time") + ds.attrs["cat:domain"] = "RegionEssai" + out = xs.utils.stack_drop_nans( + ds, + mask=mask, + new_dim="loc1", + to_file=str(tmp_path / "subfolder" / "coords_{domain}_{shape}.nc"), + ) + assert "loc1" in out.dims + assert out.sizes["loc1"] == 99 + assert (tmp_path / "subfolder" / "coords_RegionEssai_10x10.nc").is_file() + + out_no_mask = xs.utils.stack_drop_nans(ds, mask=["lon", "lat"], new_dim="loc1") + assert out_no_mask.equals(out) + + ds_unstack = xs.utils.unstack_fill_nan( + out, + dim="loc1", + coords=str(tmp_path / "subfolder" / "coords_{domain}_{shape}.nc"), + ) + assert ds_unstack.equals(ds) + + @pytest.mark.parametrize("coords", ["file.nc", ["lon", "lat"], "dict", None]) + def test_fillnan_coords(self, tmpdir, coords): + data = np.zeros((20, 10, 10)) + data[:, 1, 0] = [np.nan] * 20 + data[:, 0, :] = [np.nan] * 10 + ds = datablock_3d( + data, + "tas", + "lon", + -5, + "lat", + 80.5, + 1, + 1, + "2000-01-01", + as_dataset=True, + ) + ds.attrs["cat:domain"] = "RegionEssai" + + mask = xr.where(ds.tas.isel(time=0).isnull(), False, True).drop_vars("time") + # Add mask as a coordinate + ds = ds.assign_coords(z=mask.astype(int)) + ds.z.attrs["foo"] = "bar" + + if coords == "dict": + coords = {"lon": ds.lon, "lat": ds.lat, "z": ds.z} + elif coords == "file.nc": + coords = str(tmpdir / "coords_{domain}_{shape}.nc") + + ds_stack = xs.utils.stack_drop_nans( + ds, mask=mask, to_file=coords if isinstance(coords, str) else None + ) + ds_unstack = xs.utils.unstack_fill_nan( + ds_stack, + coords=coords, + ) + + if isinstance(coords, list): + # Cannot fully recover the original dataset. + ds_unstack["z"] = ds_unstack["z"].fillna(0) + assert ds_unstack.equals(ds.isel(lat=slice(1, None))) + elif coords is None: + # 'z' gets completely assigned as a dimension. + assert "z" in ds_unstack.dims + assert ( + ds_unstack.isel(z=0) + .drop_vars("z") + .equals(ds.isel(lat=slice(1, None)).drop_vars("z")) + ) + else: + assert ds_unstack.equals(ds) + + def test_maybe(self, tmp_path): + data = np.zeros((20, 10, 10)) + data[:, 0, 0] = [np.nan] * 20 + ds = datablock_3d( + data, + "tas", + "lon", + -5, + "lat", + 80.5, + 1, + 1, + "2000-01-01", + as_dataset=True, + ) + mask = xr.where(ds.tas.isel(time=0).isnull(), False, True).drop_vars("time") + ds.attrs["cat:domain"] = "RegionEssai" + z = xr.DataArray( + np.ones([10, 10]), + dims=["lat", "lon"], + coords={"lat": ds.lat, "lon": ds.lon}, + ) + z1d = xr.DataArray(np.ones([10]), dims=["lat"], coords={"lat": ds.lat}) + ds = ds.assign_coords(z=z, z1d=z1d) + out = xs.utils.stack_drop_nans( + ds, + mask=mask, + new_dim="loc1", + to_file=str(tmp_path / "coords_{domain}_{shape}.nc"), + ) + + maybe_unstacked = xs.utils.maybe_unstack( + out, dim="loc1", coords=str(tmp_path / "coords_{domain}_{shape}.nc") + ) + assert maybe_unstacked.equals(out) + # Call through clean_up to test the whole pipeline + maybe_unstack_dict = { + "dim": "loc1", + "coords": str(tmp_path / "coords_{domain}_{shape}.nc"), + "stack_drop_nans": True, + } + maybe_unstacked = xs.utils.clean_up(out, maybe_unstack_dict=maybe_unstack_dict) + assert maybe_unstacked.equals(ds) + maybe_unstacked = xs.utils.maybe_unstack( + out, + dim="loc1", + coords=str(tmp_path / "coords_{domain}_{shape}.nc"), + rechunk={"lon": -1, "lat": 2}, + stack_drop_nans=True, + ) + assert dict(maybe_unstacked.chunks) == { + "time": (20,), + "lat": (2, 2, 2, 2, 2), + "lon": (10,), + } + + +class TestVariablesUnits: + def test_variables_same(self): + ds = timeseries( + np.tile(np.arange(1, 13), 3), + variable="tas", + start="2001-01-01", + freq="MS", + as_dataset=True, + ) + out = xs.clean_up(ds, variables_and_units={"tas": "degK"}) + assert out.tas.attrs["units"] == "degK" + np.testing.assert_array_equal(out.tas, ds.tas) + + def test_variables_2(self): + ds = timeseries( + np.tile(np.arange(1, 13), 3), + variable="tas", + start="2001-01-01", + freq="MS", + as_dataset=True, + ) + ds["pr"] = timeseries( + np.tile(np.arange(1, 13), 3), + variable="pr", + start="2001-01-01", + freq="MS", + ) + out = xs.clean_up(ds, variables_and_units={"tas": "degK"}) + assert out.tas.attrs["units"] == "degK" + assert out.pr.attrs["units"] == "kg m-2 s-1" + + def test_variables_sametime(self): + ds = timeseries( + np.tile(np.arange(1, 13), 3), + variable="pr", + start="2001-01-01", + freq="D", + as_dataset=True, + ) + out = xs.clean_up(ds, variables_and_units={"pr": "mm/day"}) + assert out.pr.attrs["units"] == "mm d-1" + np.testing.assert_array_almost_equal(out.pr, ds.pr * 86400) + + def test_variables_amount2rate(self): + ds = timeseries( + np.tile(np.arange(1, 13), 3), + variable="pr", + start="2001-01-01", + freq="D", + units="mm", + as_dataset=True, + ) + out = xs.clean_up(ds, variables_and_units={"pr": "mm/day"}) + assert out.pr.attrs["units"] == "mm d-1" + np.testing.assert_array_almost_equal(out.pr, ds.pr) + + def test_variables_rate2amount(self): + ds = timeseries( + np.tile(np.arange(1, 13), 3), + variable="pr", + start="2001-01-01", + freq="D", + as_dataset=True, + ) + out = xs.clean_up(ds, variables_and_units={"pr": "mm"}) + assert out.pr.attrs["units"] == "mm" + np.testing.assert_array_almost_equal(out.pr, ds.pr * 86400) + + def test_variables_error(self): + ds = timeseries( + np.tile(np.arange(1, 13), 3), + variable="pr", + start="2001-01-01", + freq="D", + units="mm s-2", + as_dataset=True, + ) + with pytest.raises(ValueError, match="No known transformation"): + xs.clean_up(ds, variables_and_units={"pr": "mm"}) + + +class TestCalendar: + def test_normal(self): + ds = timeseries( + np.arange(1, 365 * 4 + 2), + variable="tas", + start="2000-01-01", + freq="D", + as_dataset=True, + ) + + out = xs.clean_up(ds, convert_calendar_kwargs={"calendar": "noleap"}) + assert isinstance(out.time.values[0], cftime.DatetimeNoLeap) + assert len(out.time) == 365 * 4 + + def test_360(self): + ds = timeseries( + np.arange(1, 365 * 4 + 2), + variable="tas", + start="2000-01-01", + freq="D", + as_dataset=True, + ) + + out = xs.clean_up(ds, convert_calendar_kwargs={"calendar": "360_day"}) + assert isinstance(out.time.values[0], cftime.Datetime360Day) + assert len(out.time) == 360 * 4 + assert len(out.time.sel(time="2000-02-30")) == 1 + + def test_missing_by_var(self): + ds = timeseries( + np.arange(1, 365 * 4 + 2), + variable="tas", + start="2000-01-01", + freq="D", + as_dataset=True, + ) + ds["pr"] = timeseries( + np.arange(1, 365 * 4 + 2), + variable="pr", + start="2000-01-01", + freq="D", + ) + ds = xs.clean_up(ds, convert_calendar_kwargs={"calendar": "noleap"}) + missing_by_vars = {"tas": "interpolate", "pr": 9999} + + out = xs.clean_up( + ds, + convert_calendar_kwargs={"calendar": "standard"}, + missing_by_var=missing_by_vars, + ) + assert out.tas.isnull().sum() == 0 + np.testing.assert_array_equal(out.tas.sel(time="2000-02-29"), 60) + assert out.pr.isnull().sum() == 0 + np.testing.assert_array_equal(out.pr.sel(time="2000-02-29"), 9999) + + def test_missing_by_var_error(self): + ds = timeseries( + np.arange(1, 365 * 4 + 2), + variable="tas", + start="2000-01-01", + freq="D", + as_dataset=True, + ) + ds["pr"] = timeseries( + np.arange(1, 365 * 4 + 2), + variable="pr", + start="2000-01-01", + freq="D", + ) + missing_by_vars = {"pr": 9999} + with pytest.raises(ValueError, match="All variables must be"): + xs.clean_up( + ds, + convert_calendar_kwargs={"calendar": "standard"}, + missing_by_var=missing_by_vars, + ) + + +def test_round(): + ds = timeseries( + np.arange(1, 365 * 4 + 2) / 1234, + variable="tas", + start="2000-01-01", + freq="D", + as_dataset=True, + ) + ds["pr"] = timeseries( + np.arange(1, 365 * 4 + 2) / 1234, + variable="pr", + start="2000-01-01", + freq="D", + ) + out = xs.clean_up(ds, round_var={"tas": 6, "pr": 1}) + np.testing.assert_array_equal(out.tas.isel(time=0), 0.000810) + np.testing.assert_array_equal(out.pr.isel(time=0), 0.0) + assert "Rounded 'pr' to 1 decimal" in out["pr"].attrs["history"] + + +class TestAttrs: + def test_common(self): + ds1 = timeseries( + np.arange(1, 365 * 4 + 2), + variable="tas", + start="2000-01-01", + freq="D", + as_dataset=True, + ) + + ds2 = timeseries( + np.arange(1, 365 * 4 + 2), + variable="tas", + start="2000-01-01", + freq="D", + as_dataset=True, + ) + ds2.attrs = { + "foo": "bar", + "cat:type": "simulation", + "cat:variable": ("tas",), + "cat:source": "CNRM-CM6", + "cat:mip_era": "CMIP6", + } + ds3 = timeseries( + np.arange(1, 365 * 4 + 2), + variable="tas", + start="2000-01-01", + freq="D", + as_dataset=True, + ) + ds3.attrs = { + "foo": "bar", + "cat:type": "simulation", + "cat:variable": ("tas",), + "cat:source": "CanESM5", + "cat:mip_era": "CMIP6", + } + + # Nothing in common between ds1 and the other datasets + ds1.attrs = {"bar": "foo"} + out = xs.clean_up(ds1, common_attrs_only=[ds2, ds3]) + assert out.attrs == {} + + ds1.attrs = ds2.attrs + out = xs.clean_up(ds1, common_attrs_only=[ds2, ds3]) + assert all( + k in out.attrs + for k in ["foo", "cat:type", "cat:variable", "cat:mip_era", "cat:id"] + ) + assert out.attrs["cat:id"] == "CMIP6" + + del ds1.attrs["cat:mip_era"] + out = xs.clean_up(ds1, common_attrs_only={"a": ds2, "b": ds3}) + assert all( + k in out.attrs for k in ["foo", "cat:type", "cat:variable", "cat:id"] + ) + assert out.attrs["cat:id"] == "" + + @pytest.mark.requires_netcdf + def test_common_open(self): + ds1 = xr.open_dataset( + Path(__file__).parent.parent + / "docs" + / "notebooks" + / "samples" + / "tutorial" + / "ScenarioMIP" + / "example-region" + / "NCC" + / "NorESM2-MM" + / "ssp126" + / "r1i1p1f1" + / "day" + / "ScenarioMIP_NCC_NorESM2-MM_ssp126_r1i1p1f1_gn_raw.nc" + ) + ds1.attrs["cat:id"] = "SomeID" + ds2 = ( + Path(__file__).parent.parent + / "docs" + / "notebooks" + / "samples" + / "tutorial" + / "ScenarioMIP" + / "example-region" + / "NCC" + / "NorESM2-MM" + / "ssp245" + / "r1i1p1f1" + / "day" + / "ScenarioMIP_NCC_NorESM2-MM_ssp245_r1i1p1f1_gn_raw.nc" + ) + ds3 = ( + Path(__file__).parent.parent + / "docs" + / "notebooks" + / "samples" + / "tutorial" + / "ScenarioMIP" + / "example-region" + / "NCC" + / "NorESM2-MM" + / "ssp585" + / "r1i1p1f1" + / "day" + / "ScenarioMIP_NCC_NorESM2-MM_ssp585_r1i1p1f1_gn_raw.nc" + ) + + out = xs.clean_up(ds1, common_attrs_only=[ds2, ds3]) + assert ( + out.attrs["comment"] + == "This is a test file created for the xscen tutorial. This file is not a real CMIP6 file." + ) + assert out.attrs.get("cat:id") is None + + def test_to_level(self): + ds = timeseries( + np.arange(1, 365 * 4 + 2), + variable="tas", + start="2000-01-01", + freq="D", + as_dataset=True, + ) + out = xs.clean_up(ds, to_level="cat") + assert out.attrs["cat:processing_level"] == "cat" + + def test_remove(self): + ds = timeseries( + np.arange(1, 365 * 4 + 2), + variable="tas", + start="2000-01-01", + freq="D", + as_dataset=True, + ) + ds.attrs = { + "foo": "bar", + "cat:type": "simulation", + "cat:variable": ("tas",), + "cat:source": "CNRM-CM6", + "bacat:mip_era": "CMIP6", + } + out = xs.clean_up( + ds, attrs_to_remove={"tas": ["units"], "global": [".*cat.*", "foo"]} + ) + assert "units" in ds.tas.attrs + assert "units" not in out.tas.attrs + assert out.attrs == {} + out2 = xs.clean_up(ds, attrs_to_remove={"tas": ["units"], "global": ["cat.*"]}) + assert out2.attrs == {"foo": "bar", "bacat:mip_era": "CMIP6"} + + def test_remove_except(self): + ds = timeseries( + np.arange(1, 365 * 4 + 2), + variable="tas", + start="2000-01-01", + freq="D", + as_dataset=True, + ) + ds.attrs = { + "foo": "bar", + "cat:type": "simulation", + "cat:variable": ("tas",), + "cat:source": "CNRM-CM6", + "bacat:mip_era": "CMIP6", + } + out = xs.clean_up( + ds, remove_all_attrs_except={"tas": ["units"], "global": [".*cat.*"]} + ) + assert out.tas.attrs == {"units": "K"} + assert out.attrs == { + "cat:type": "simulation", + "cat:variable": ("tas",), + "cat:source": "CNRM-CM6", + "bacat:mip_era": "CMIP6", + } + out2 = xs.clean_up(ds, remove_all_attrs_except={"global": ["cat.*"]}) + assert len(out2.tas.attrs) == 4 + assert out2.attrs == { + "cat:type": "simulation", + "cat:variable": ("tas",), + "cat:source": "CNRM-CM6", + } + + def test_add_attr(self): + ds = timeseries( + np.arange(1, 365 * 4 + 2), + variable="tas", + start="2000-01-01", + freq="D", + as_dataset=True, + ) + out = xs.clean_up( + ds, + add_attrs={"tas": {"foo": "bar"}, "global": {"foo2": "electric boogaloo"}}, + ) + assert out.tas.attrs["foo"] == "bar" + assert out.attrs["foo2"] == "electric boogaloo" + + @pytest.mark.parametrize( + "change_prefix", ["dog", {"cat": "dog:"}, {"cat:": "dog:", "bacat": "badog"}] + ) + def test_change_prefix(self, change_prefix): + ds = timeseries( + np.arange(1, 365 * 4 + 2), + variable="tas", + start="2000-01-01", + freq="D", + as_dataset=True, + ) + ds.attrs = { + "foo": "bar", + "cat:type": "simulation", + "cat:variable": ("tas",), + "cat:source": "CNRM-CM6", + "bacat:mip_era": "CMIP6", + } + out = xs.clean_up(ds, change_attr_prefix=change_prefix) + if isinstance(change_prefix, str) or len(change_prefix) == 1: + assert out.attrs == { + "foo": "bar", + "dog:type": "simulation", + "dog:variable": ("tas",), + "dog:source": "CNRM-CM6", + "bacat:mip_era": "CMIP6", + } + else: + assert out.attrs == { + "foo": "bar", + "dog:type": "simulation", + "dog:variable": ("tas",), + "dog:source": "CNRM-CM6", + "badog:mip_era": "CMIP6", + } + + +class TestPublish: + @pytest.mark.requires_netcdf + @pytest.mark.parametrize("fmt", ["md", "rst"]) + def test_normal(self, fmt): + out = xs.utils.publish_release_notes( + fmt, changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst") + ) + if fmt == "md": + assert out.startswith("# Changelog\n\n") + assert "[PR/413](https://github.com/Ouranosinc/xscen/pull/413)" in out + elif fmt == "rst": + assert out.startswith("=========\nChangelog\n=========\n\n") + assert "`PR/413 `_" in out + + def test_error(self): + with pytest.raises(FileNotFoundError): + xs.utils.publish_release_notes("md", changes="foo") + with pytest.raises(NotImplementedError): + xs.utils.publish_release_notes( + "foo", changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst") + ) + + @pytest.mark.requires_netcdf + def test_file(self, tmpdir): + xs.utils.publish_release_notes( + "md", + file=tmpdir / "foo.md", + changes=Path(__file__).parent.parent.joinpath("CHANGELOG.rst"), + ) + with Path(tmpdir).joinpath("foo.md").open(encoding="utf-8") as f: + assert f.read().startswith("# Changelog\n\n") + + +class TestUnstackDates: + @pytest.mark.parametrize( + "freq", ["MS", "2MS", "3MS", "QS-DEC", "QS", "2QS", "YS", "YS-DEC", "4YS"] + ) + def test_normal(self, freq): + ds = timeseries( + np.arange(1, 35), + variable="tas", + start="2000-01-01", + freq=freq, + as_dataset=True, + ) + out = xs.utils.unstack_dates(ds) + np.testing.assert_array_equal( + out.time, + pd.date_range( + "2000-01-01", + periods=len(np.unique(ds.time.dt.year)), + freq="YS" if freq != "4YS" else "4YS", + ), + ) + if freq == "MS": + np.testing.assert_array_equal( + out.month, + [ + "JAN", + "FEB", + "MAR", + "APR", + "MAY", + "JUN", + "JUL", + "AUG", + "SEP", + "OCT", + "NOV", + "DEC", + ], + ) + elif "M" in freq: + assert len(out.season) == 12 / int(freq[0]) + np.testing.assert_array_equal( + out.season[0], ["JF"] if freq == "2MS" else ["JFM"] + ) + elif "QS" in freq: + assert len(out.season) == 4 if freq != "2QS" else 2 + np.testing.assert_array_equal( + out.season[0], + ( + ["MAM"] + if freq == "QS-DEC" + else ["JFM"] if freq == "QS" else ["JFMAMJ"] + ), + ) + else: + assert len(out.season) == 1 + np.testing.assert_array_equal( + out.season[0], [f"{freq.replace('YS', 'annual')}"] + ) + + @pytest.mark.parametrize("freq", ["MS", "QS", "YS"]) + def test_seasons(self, freq): + ds = timeseries( + np.arange(1, 35), + variable="tas", + start="2000-01-01", + freq=freq, + as_dataset=True, + ) + seasons = { + 1: "january", + 2: "february", + 3: "march", + 4: "april", + 5: "may", + 6: "june", + 7: "july", + 8: "august", + 9: "september", + 10: "october", + 11: "november", + 12: "december", + } + out = xs.utils.unstack_dates(ds, seasons=seasons) + if freq == "MS": + np.testing.assert_array_equal( + out.month, + [ + "january", + "february", + "march", + "april", + "may", + "june", + "july", + "august", + "september", + "october", + "november", + "december", + ], + ) + elif freq == "QS": + np.testing.assert_array_equal( + out.season, ["january", "april", "july", "october"] + ) + elif freq == "YS": + np.testing.assert_array_equal(out.season, ["january"]) + + @pytest.mark.parametrize("freq", ["2MS", "QS-DEC", "YS-DEC"]) + def test_winter(self, freq): + ds = timeseries( + np.arange(1, 35), + variable="tas", + start="2000-12-01", + freq=freq, + as_dataset=True, + ) + out = xs.utils.unstack_dates(ds) + assert pd.Timestamp(str(out.time[0].values).split("T")[0]) == pd.Timestamp( + "2000-01-01" + ) + out = xs.utils.unstack_dates(ds, winter_starts_year=True) + assert pd.Timestamp(str(out.time[0].values).split("T")[0]) == pd.Timestamp( + "2001-01-01" + ) + + def test_coords(self): + freq = "MS" + ds = timeseries( + np.arange(1, 35), + variable="tas", + start="2000-01-01", + freq=freq, + ) + ds["horizon"] = xr.DataArray( + np.array(["2001-2009"] * len(ds.time)), + dims="time", + coords={"time": ds.time}, + ) + ds = ds.assign_coords({"horizon": ds.horizon}) + out = xs.utils.unstack_dates(ds) + assert all(k in out["horizon"].dims for k in ["time", "month"]) + + def test_dask(self): + freq = "MS" + ds = timeseries( + dask.array.from_array(np.arange(1, 35), chunks=(10,)), + variable="tas", + start="2000-01-01", + freq=freq, + ) + out = xs.utils.unstack_dates(ds) + assert isinstance(out.data, dask.array.Array) + assert "month" in out.dims + + def test_errors(self): + ds = timeseries( + np.arange(1, 365 * 4 + 2), + variable="tas", + start="2000-01-01", + freq="D", + as_dataset=True, + ) + with pytest.raises(ValueError, match="Only monthly frequencies"): + xs.utils.unstack_dates(ds) + ds = ds.where(ds.time.dt.day != 1, drop=True) + with pytest.raises( + ValueError, match="The data must have a clean time coordinate." + ): + xs.utils.unstack_dates(ds) + + ds = timeseries( + np.arange(1, 13), + variable="tas", + start="2000-01-01", + freq="7MS", + as_dataset=True, + ) + with pytest.raises( + ValueError, match="Only periods that divide the year evenly are supported." + ): + xs.utils.unstack_dates(ds) + + +def test_show_version(tmpdir): + xs.utils.show_versions(file=tmpdir / "versions.txt") + with Path(tmpdir).joinpath("versions.txt").open(encoding="utf-8") as f: + out = f.read() + assert "xscen" in out + assert "xclim" in out + assert "xarray" in out + assert "numpy" in out + assert "pandas" in out + assert "dask" in out + assert "cftime" in out + assert "netCDF4" in out + + +class TestEnsureTime: + def test_xrfreq_ok(self): + ds = timeseries( + np.arange(1, 360), + variable="tas", + start="2000-01-01T12:00:00", + freq="D", + as_dataset=True, + ) + out = xs.utils.ensure_correct_time(ds, "D") + assert np.all(out.time.dt.hour == 0) + + def test_xrfreq_bad(self): + ds = timeseries( + np.arange(1, 360), + variable="tas", + start="2000-01-01T12:00:00", + freq="D", + as_dataset=True, + ) + # Add random small number of seconds to the time + ds["time"] = ds.time + (np.random.rand(len(ds.time)) * 10).astype( + "timedelta64[s]" + ) + out = xs.utils.ensure_correct_time(ds, "D") + assert np.all(out.time.dt.hour == 0) + assert np.all(out.time.dt.second == 0) + + def test_xrfreq_error(self): + ds = timeseries( + np.arange(1, 360), + variable="tas", + start="2000-01-01T12:00:00", + freq="D", + as_dataset=True, + ) + # Add random small number of seconds to the time + rng = np.random.default_rng(0) + ds["time"] = ds.time + (rng.random(len(ds.time)) * 24).astype("timedelta64[h]") + with pytest.raises( + ValueError, + match="Dataset is labelled as having a sampling frequency of D, but some periods have more than one data point.", + ): + xs.utils.ensure_correct_time(ds, "D") + ds = timeseries( + np.arange(1, 360), + variable="tas", + start="2000-01-01T12:00:00", + freq="D", + as_dataset=True, + ) + # Remove some time points + ds = ds.where(ds.time.dt.day % 2 == 0, drop=True) + with pytest.raises( + ValueError, + match="The resampling count contains NaNs or 0s. There might be some missing data.", + ): + xs.utils.ensure_correct_time(ds, "D") + + +class TestStandardPeriod: + @pytest.mark.parametrize( + "period", [[1981, 2010], [[1981, 2010]], ["1981", "2010"], [["1981", "2010"]]] + ) + def test_normal(self, period): + out = xs.utils.standardize_periods(period, multiple=True) + assert out == [["1981", "2010"]] + + out = xs.utils.standardize_periods(period, multiple=False) + assert out == ["1981", "2010"] + + def test_error(self): + assert xs.utils.standardize_periods(None) is None + with pytest.raises(ValueError, match="should be comprised of two elements"): + xs.utils.standardize_periods(["1981-2010"]) + with pytest.raises(ValueError, match="should be in chronological order,"): + xs.utils.standardize_periods(["2010", "1981"]) + with pytest.raises(ValueError, match="should be a single instance"): + xs.utils.standardize_periods( + [["1981", "2010"], ["1981", "2010"]], multiple=False + ) + + +def test_sort_seasons(): + seasons = pd.Index(["JJA", "DJF", "SON", "MAM"]) + out = xs.utils.season_sort_key(seasons, name="season") + np.testing.assert_array_equal(out, [6, 0, 9, 3]) + + seasons = pd.Index(["JFM", "JAS", "OND", "AMJ"]) + out = xs.utils.season_sort_key(seasons, name="season") + np.testing.assert_array_equal(out, [1, 7, 10, 4]) + + seasons = pd.Index(["FEB", "JAN", "MAR", "DEC"]) + out = xs.utils.season_sort_key(seasons, name="month") + np.testing.assert_array_equal(out, [1, 0, 2, 11]) + + # Invalid returns the original object + seasons = pd.Index(["FEB", "DEC", "foo"]) + out = xs.utils.season_sort_key(seasons, name="month") + np.testing.assert_array_equal(out, seasons) + + +def test_xrfreq_to_timedelta(): + assert xs.utils.xrfreq_to_timedelta("D") == pd.Timedelta(1, "D") + assert xs.utils.xrfreq_to_timedelta("QS-DEC") == pd.Timedelta(90, "D") + assert xs.utils.xrfreq_to_timedelta("YS") == pd.Timedelta(365, "D") + assert xs.utils.xrfreq_to_timedelta("2QS") == pd.Timedelta(180, "D") + with pytest.raises(ValueError, match="Invalid frequency"): + xs.utils.xrfreq_to_timedelta("foo") + + +def test_ensure_new_xrfreq(): + assert xs.utils.ensure_new_xrfreq("2M") == "2ME" + assert xs.utils.ensure_new_xrfreq("2Q-DEC") == "2QE-DEC" + assert xs.utils.ensure_new_xrfreq("AS-JUL") == "YS-JUL" + assert xs.utils.ensure_new_xrfreq("A-JUL") == "YE-JUL" + assert xs.utils.ensure_new_xrfreq("Y-JUL") == "YE-JUL" + assert xs.utils.ensure_new_xrfreq("A") == "YE" + assert xs.utils.ensure_new_xrfreq("Y") == "YE" + assert xs.utils.ensure_new_xrfreq("3H") == "3h" + assert xs.utils.ensure_new_xrfreq("3T") == "3min" + assert xs.utils.ensure_new_xrfreq("3S") == "3s" + assert xs.utils.ensure_new_xrfreq("3L") == "3ms" + assert xs.utils.ensure_new_xrfreq("3U") == "3us" + + # Errors + assert xs.utils.ensure_new_xrfreq(3) == 3 + assert xs.utils.ensure_new_xrfreq("foo") == "foo" + + +def test_xarray_defaults(): + kwargs = { + "chunks": {"time": 10}, + "foo": "bar", + "xr_open_kwargs": {"decode_times": False}, + "xr_combine_kwargs": {"combine_attrs": "drop"}, + } + out = xs.utils._xarray_defaults(**kwargs) + assert out == { + "chunks": {"time": 10}, + "foo": "bar", + "xarray_open_kwargs": {"decode_times": False, "chunks": {}}, + "xarray_combine_by_coords_kwargs": { + "combine_attrs": "drop", + "data_vars": "minimal", + }, + } diff --git a/tox.ini b/tox.ini index 112d8473..6599f016 100644 --- a/tox.ini +++ b/tox.ini @@ -1,41 +1,44 @@ [tox] -min_version = 4.0 +min_version = 4.18.0 envlist = lint - py{39,310,311,312} + py{310,311,312} docs-esmpy requires = - babel - pip >= 23.3.1 + pip >= 24.2.0 setuptools >= 65.0 opts = --colored --verbose +[gh] +python = + 3.10 = py310-coveralls + 3.11 = py311-coveralls + 3.12 = py312-esmpy-coveralls + [testenv:lint] -description = Check for Code Compliance +description = Check for Code Compliance and missing french translations skip_install = True download = true -conda_channels = -conda_env = deps = - black ==24.2.0 + black[jupyter] ==24.8.0 blackdoc ==0.3.9 isort ==5.13.2 - flake8 - flake8-rst-docstrings - ruff >=0.2.0 + flake8 >=7.1.1 + flake8-rst-docstrings >=0.3.0 + ruff >=0.5.7 + numpydoc >=1.8.0 commands_pre = pip list commands = make lint + make checkfrench allowlist_externals = make [testenv:docs{,-esmpy}] description = Run Build of xscen Documentation -conda_deps = -conda_env = environment-dev.yml extras = docs commands = @@ -45,8 +48,6 @@ allowlist_externals = [testenv:doctests{,-esmpy}] description = Run documentation linters and doctests with pytest under {basepython} -conda_deps = -conda_env = environment-dev.yml extras = dev docs @@ -58,13 +59,14 @@ commands = description = Run tests with pytest under {basepython} (Anaconda distribution) setenv = COV_CORE_SOURCE = - PYTEST_ADDOPTS = --color=yes --cov=xscen --strict-markers --verbose PYTHONPATH = {toxinidir} passenv = + CI COVERALLS_* ESMFMKFILE ESMF_VERSION GITHUB_* + XCLIM_* download = true deps = coveralls: coveralls @@ -72,21 +74,13 @@ deps = upstream: -rrequirements_upstream.txt extras = dev -conda_channels = - conda-forge - defaults -conda_deps = - pytest - pytest-cov - xdoctest -conda_env = environment.yml install_command = python -m pip install --no-user {opts} {packages} commands_pre = pip list pip check commands = - make translate pytest {posargs} +; Coveralls requires access to a repo token set in .coveralls.yml in order to report stats coveralls: - coveralls allowlist_externals = make diff --git a/xscen/data/fr/LC_MESSAGES/xscen.po b/xscen/data/fr/LC_MESSAGES/xscen.po deleted file mode 100644 index e7d76e0a..00000000 --- a/xscen/data/fr/LC_MESSAGES/xscen.po +++ /dev/null @@ -1,51 +0,0 @@ -# French translations for xscen. -# Copyright (C) 2023 ORGANIZATION -# This file is distributed under the same license as the xscen project. -# Équipe Xscen , 2023. -# -msgid "" -msgstr "" -"Project-Id-Version: xscen 0.6.18b0\n" -"Report-Msgid-Bugs-To: Rondeau-Genesse.Gabriel@ouranos.ca\n" -"POT-Creation-Date: 2023-09-29 11:45-0400\n" -"PO-Revision-Date: 2023-08-15 16:48-0400\n" -"Last-Translator: Pascal Bourgault \n" -"Language: fr\n" -"Language-Team: fr \n" -"Plural-Forms: nplurals=2; plural=(n > 1);\n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel 2.12.1\n" - -#: xscen/aggregate.py:185 -msgid "{window}-year mean of {attr}." -msgstr "Moyenne {window} ans de {attr}." - -#: xscen/aggregate.py:319 -msgid "{attr1}: {kind} delta compared to {refhoriz}." -msgstr "{attr1}: Delta {kind} comparé à {refhoriz}." - -#: xscen/diagnostics.py:501 -msgid "Ranking of measure performance" -msgstr "Classement de performance de la mesure" - -#: xscen/diagnostics.py:560 -msgid "Fraction of improved grid cells" -msgstr "Fraction de points de grille améliorés" - -#: xscen/io.py:650 -msgid "Variable" -msgstr "Variable" - -#: xscen/io.py:650 -msgid "Description" -msgstr "Description" - -#: xscen/io.py:650 -msgid "Units" -msgstr "Unités" - -#: xscen/io.py:654 -msgid "Content" -msgstr "Contenu" diff --git a/xscen/regrid.py b/xscen/regrid.py deleted file mode 100644 index 4ff97532..00000000 --- a/xscen/regrid.py +++ /dev/null @@ -1,400 +0,0 @@ -"""Functions to regrid datasets.""" - -import datetime -import operator -import os -import warnings -from copy import deepcopy -from typing import Optional, Union - -import cartopy.crs as ccrs -import cf_xarray as cfxr -import numpy as np -import xarray as xr - -try: - import xesmf as xe - from xesmf.frontend import Regridder -except ImportError: - xe = None - Regridder = "xesmf.Regridder" - -from .config import parse_config - -# TODO: Implement logging, warnings, etc. -# TODO: Add an option to call xesmf.util.grid_2d or xesmf.util.grid_global -# TODO: Implement support for an OBS2SIM kind of interpolation - - -__all__ = ["create_mask", "regrid_dataset"] - - -@parse_config -def regrid_dataset( # noqa: C901 - ds: xr.Dataset, - ds_grid: xr.Dataset, - weights_location: Union[str, os.PathLike], - *, - regridder_kwargs: Optional[dict] = None, - intermediate_grids: Optional[dict] = None, - to_level: str = "regridded", -) -> xr.Dataset: - """Regrid a dataset according to weights and a reference grid. - - Based on an intake_esm catalog, this function performs regridding on Zarr files. - - Parameters - ---------- - ds : xarray.Dataset - Dataset to regrid. The Dataset needs to have lat/lon coordinates. - Supports a 'mask' variable compatible with ESMF standards. - weights_location : Union[str, os.PathLike] - Path to the folder where weight file is saved. - ds_grid : xr.Dataset - Destination grid. The Dataset needs to have lat/lon coordinates. - Supports a 'mask' variable compatible with ESMF standards. - regridder_kwargs : dict, optional - Arguments to send xe.Regridder(). If it contains `skipna` or `output_chunks`, those - are passed to the regridder call directly. - intermediate_grids : dict, optional - This argument is used to do a regridding in many steps, regridding to regular - grids before regridding to the final ds_grid. - This is useful when there is a large jump in resolution between ds and ds grid. - The format is a nested dictionary shown in Notes. - If None, no intermediary grid is used, there is only a regrid from ds to ds_grid. - to_level : str - The processing level to assign to the output. - Defaults to 'regridded' - - Returns - ------- - xarray.Dataset - Regridded dataset - - Notes - ----- - intermediate_grids = - {'name_of_inter_grid_1': {'cf_grid_2d': {arguments for util.cf_grid_2d },'regridder_kwargs':{arguments for xe.Regridder}}, - 'name_of_inter_grid_2': dictionary_as_above} - - See Also - -------- - xesmf.regridder, xesmf.util.cf_grid_2d - """ - if xe is None: - raise ImportError( - "xscen's regridding functionality requires xESMF to work, please install that package." - ) - - regridder_kwargs = regridder_kwargs or {} - - ds_grids = [] # list of target grids - reg_arguments = [] # list of accompanying arguments for xe.Regridder() - if intermediate_grids: - for name_inter, dict_inter in intermediate_grids.items(): - reg_arguments.append(dict_inter["regridder_kwargs"]) - ds_grids.append(xe.util.cf_grid_2d(**dict_inter["cf_grid_2d"])) - - ds_grids.append(ds_grid) # add final ds_grid - reg_arguments.append(regridder_kwargs) # add final regridder_kwargs - - out = None - - # Whether regridding is required - if ds["lon"].equals(ds_grid["lon"]) & ds["lat"].equals(ds_grid["lat"]): - out = ds - if "mask" in out: - out = out.where(out.mask == 1) - out = out.drop_vars(["mask"]) - - else: - for i, (ds_grid, regridder_kwargs) in enumerate(zip(ds_grids, reg_arguments)): - # if this is not the first iteration (out != None), - # get result from last iteration (out) as input - ds = out or ds - - kwargs = deepcopy(regridder_kwargs) - # if weights_location does no exist, create it - if not os.path.exists(weights_location): - os.makedirs(weights_location) - id = ds.attrs["cat:id"] if "cat:id" in ds.attrs else "weights" - # give unique name to weights file - weights_filename = os.path.join( - weights_location, - f"{id}_regrid{i}" - f"{'_'.join(kwargs[k] for k in kwargs if isinstance(kwargs[k], str))}.nc", - ) - - # Re-use existing weight file if possible - if os.path.isfile(weights_filename) and not ( - ("reuse_weights" in kwargs) and (kwargs["reuse_weights"] is False) - ): - kwargs["weights"] = weights_filename - kwargs["reuse_weights"] = True - - # Extract args that are to be given at call time. - # output_chunks is only valid for xesmf >= 0.8, so don't add it be default to the call_kwargs - call_kwargs = {"skipna": regridder_kwargs.pop("skipna", False)} - if "output_chunks" in regridder_kwargs: - call_kwargs["output_chunks"] = regridder_kwargs.pop("output_chunks") - - regridder = _regridder( - ds_in=ds, ds_grid=ds_grid, filename=weights_filename, **regridder_kwargs - ) - - # The regridder (when fed Datasets) doesn't like if 'mask' is present. - if "mask" in ds: - ds = ds.drop_vars(["mask"]) - - out = regridder(ds, keep_attrs=True, **call_kwargs) - - # double-check that grid_mapping information is transferred - gridmap_out = any( - "grid_mapping" in ds_grid[da].attrs for da in ds_grid.data_vars - ) - if gridmap_out: - gridmap = np.unique( - [ - ds_grid[da].attrs["grid_mapping"] - for da in ds_grid.data_vars - if "grid_mapping" in ds_grid[da].attrs - and ds_grid[da].attrs["grid_mapping"] in ds_grid - ] - ) - if len(gridmap) != 1: - warnings.warn( - "Could not determine and transfer grid_mapping information." - ) - else: - # Add the grid_mapping attribute - for v in out.data_vars: - out[v].attrs["grid_mapping"] = gridmap[0] - # Add the grid_mapping coordinate - if gridmap[0] not in out: - out = out.assign_coords({gridmap[0]: ds_grid[gridmap[0]]}) - # Regridder seems to seriously mess up the rotated dimensions - for d in out.lon.dims: - out[d] = ds_grid[d] - if d not in out.coords: - out = out.assign_coords({d: ds_grid[d]}) - else: - gridmap = np.unique( - [ - ds[da].attrs["grid_mapping"] - for da in ds.data_vars - if "grid_mapping" in ds[da].attrs - ] - ) - # Remove the original grid_mapping attribute - for v in out.data_vars: - if "grid_mapping" in out[v].attrs: - out[v].attrs.pop("grid_mapping") - # Remove the original grid_mapping coordinate if it is still in the output - out = out.drop_vars(set(gridmap).intersection(out.variables)) - - # History - kwargs_for_hist = deepcopy(regridder_kwargs) - kwargs_for_hist.setdefault("method", regridder.method) - if intermediate_grids and i < len(intermediate_grids): - name_inter = list(intermediate_grids.keys())[i] - cf_grid_2d_args = intermediate_grids[name_inter]["cf_grid_2d"] - new_history = ( - f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] " - f"regridded with regridder arguments {kwargs_for_hist} to a xesmf" - f" cf_grid_2d with arguments {cf_grid_2d_args} - xESMF v{xe.__version__}" - ) - else: - new_history = ( - f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] " - f"regridded with arguments {kwargs_for_hist} - xESMF v{xe.__version__}" - ) - history = ( - f"{new_history}\n{out.attrs['history']}" - if "history" in out.attrs - else new_history - ) - out.attrs["history"] = history - - out = out.drop_vars("latitude_longitude", errors="ignore") - # Attrs - out.attrs["cat:processing_level"] = to_level - out.attrs["cat:domain"] = ( - ds_grid.attrs["cat:domain"] if "cat:domain" in ds_grid.attrs else None - ) - return out - - -@parse_config -def create_mask(ds: Union[xr.Dataset, xr.DataArray], mask_args: dict) -> xr.DataArray: - """Create a 0-1 mask based on incoming arguments. - - Parameters - ---------- - ds : xr.Dataset or xr.DataArray - Dataset or DataArray to be evaluated - mask_args : dict - Instructions to build the mask (required fields listed in the Notes). - - Note - ---- - 'mask' fields: - variable: str, optional - Variable on which to base the mask, if ds_mask is not a DataArray. - where_operator: str, optional - Conditional operator such as '>' - where_threshold: str, optional - Value threshold to be used in conjunction with where_operator. - mask_nans: bool - Whether to apply a mask on NaNs. - - Returns - ------- - xr.DataArray - Mask array. - """ - # Prepare the mask for the destination grid - ops = { - "<": operator.lt, - "<=": operator.le, - "==": operator.eq, - "!=": operator.ne, - ">=": operator.ge, - ">": operator.gt, - } - - def cmp(arg1, op, arg2): - operation = ops.get(op) - return operation(arg1, arg2) - - mask_args = mask_args or {} - if isinstance(ds, xr.DataArray): - mask = ds - elif isinstance(ds, xr.Dataset) and "variable" in mask_args: - mask = ds[mask_args["variable"]] - else: - raise ValueError("Could not determine what to base the mask on.") - - if "time" in mask.dims: - mask = mask.isel(time=0) - - if "where_operator" in mask_args: - mask = xr.where( - cmp(mask, mask_args["where_operator"], mask_args["where_threshold"]), 1, 0 - ) - else: - mask = xr.ones_like(mask) - if ("mask_nans" in mask_args) & (mask_args["mask_nans"] is True): - mask = mask.where(np.isreal(mask), other=0) - - # Attributes - if "where_operator" in mask_args: - mask.attrs["where_threshold"] = ( - f"{mask_args['variable']} {mask_args['where_operator']} {mask_args['where_threshold']}" - ) - mask.attrs["mask_nans"] = f"{mask_args['mask_nans']}" - - return mask - - -def _regridder( - ds_in: xr.Dataset, - ds_grid: xr.Dataset, - filename: Union[str, os.PathLike], - *, - method: str = "bilinear", - unmapped_to_nan: Optional[bool] = True, - **kwargs, -) -> Regridder: - """Call to xesmf Regridder with a few default arguments. - - Parameters - ---------- - ds_in : xr.Dataset - Incoming grid. The Dataset needs to have lat/lon coordinates. - ds_grid : xr.Dataset - Destination grid. The Dataset needs to have lat/lon coordinates. - filename : str or os.PathLike - Path to the NetCDF file with weights information. - method : str - Interpolation method. - unmapped_to_nan : bool, optional - Arguments to send xe.Regridder(). - regridder_kwargs : dict - Arguments to send xe.Regridder(). - - Returns - ------- - xe.frontend.Regridder - Regridder object - """ - if method.startswith("conservative"): - if ( - ds_in.cf["longitude"].ndim == 2 - and "longitude" not in ds_in.cf.bounds - and "rotated_pole" in ds_in - ): - ds_in = ds_in.update(create_bounds_rotated_pole(ds_in)) - if ( - ds_grid.cf["longitude"].ndim == 2 - and "longitude" not in ds_grid.cf.bounds - and "rotated_pole" in ds_grid - ): - ds_grid = ds_grid.update(create_bounds_rotated_pole(ds_grid)) - - regridder = xe.Regridder( - ds_in=ds_in, - ds_out=ds_grid, - method=method, - unmapped_to_nan=unmapped_to_nan, - **kwargs, - ) - if ~os.path.isfile(filename): - regridder.to_netcdf(filename) - - return regridder - - -def create_bounds_rotated_pole(ds: xr.Dataset): - """Create bounds for rotated pole datasets.""" - ds = ds.cf.add_bounds(["rlat", "rlon"]) - - # In "vertices" format then expand to 2D. From (N, 2) to (N+1,) to (N+1, M+1) - rlatv1D = cfxr.bounds_to_vertices(ds.rlat_bounds, "bounds") - rlonv1D = cfxr.bounds_to_vertices(ds.rlon_bounds, "bounds") - rlatv = rlatv1D.expand_dims(rlon_vertices=rlonv1D).transpose( - "rlon_vertices", "rlat_vertices" - ) - rlonv = rlonv1D.expand_dims(rlat_vertices=rlatv1D).transpose( - "rlon_vertices", "rlat_vertices" - ) - - # Get cartopy's crs for the projection - RP = ccrs.RotatedPole( - pole_longitude=ds.rotated_pole.grid_north_pole_longitude, - pole_latitude=ds.rotated_pole.grid_north_pole_latitude, - central_rotated_longitude=ds.rotated_pole.north_pole_grid_longitude, - ) - PC = ccrs.PlateCarree() - - # Project points - pts = PC.transform_points(RP, rlonv.values, rlatv.values) - lonv = rlonv.copy(data=pts[..., 0]).rename("lon_vertices") - latv = rlatv.copy(data=pts[..., 1]).rename("lat_vertices") - - # Back to CF bounds format. From (N+1, M+1) to (4, N, M) - lonb = cfxr.vertices_to_bounds(lonv, ("bounds", "rlon", "rlat")).rename( - "lon_bounds" - ) - latb = cfxr.vertices_to_bounds(latv, ("bounds", "rlon", "rlat")).rename( - "lat_bounds" - ) - - # Create dataset, set coords and attrs - ds_bnds = xr.merge([lonb, latb]).assign( - lon=ds.lon, lat=ds.lat, rotated_pole=ds.rotated_pole - ) - ds_bnds["rlat"] = ds.rlat - ds_bnds["rlon"] = ds.rlon - ds_bnds.lat.attrs["bounds"] = "lat_bounds" - ds_bnds.lon.attrs["bounds"] = "lon_bounds" - return ds_bnds.transpose(*ds.lon.dims, "bounds")