diff --git a/.cruft.json b/.cruft.json index 683dfc6a..d218e415 100644 --- a/.cruft.json +++ b/.cruft.json @@ -11,7 +11,7 @@ "project_slug": "xscen", "project_short_description": "A climate change scenario-building analysis framework, built with xclim/xarray.", "pypi_username": "RondeauG", - "version": "0.8.4-dev.6", + "version": "0.8.4-dev.10", "use_pytest": "y", "use_black": "y", "use_conda": "y", diff --git a/.github/workflows/workflow-warning.yml b/.github/workflows/workflow-warning.yml index ce4d9975..729cf014 100644 --- a/.github/workflows/workflow-warning.yml +++ b/.github/workflows/workflow-warning.yml @@ -33,7 +33,7 @@ jobs: allowed-endpoints: > api.github.com:443 - name: Find comment - uses: peter-evans/find-comment@d5fe37641ad8451bdd80312415672ba26c86575e # v3.0.0 + uses: peter-evans/find-comment@3eae4d37986fb5a8592848f6a574fdf654e61f9e # v3.1.0 id: fc with: issue-number: ${{ github.event.pull_request.number }} diff --git a/CHANGES.rst b/CHANGES.rst index 79728a71..9f18f06a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,7 @@ Changelog v0.9.0 (unreleased) ------------------- -Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Gabriel Rondeau-Genesse (:user:`RondeauG`), Juliette Lavoie (:user: `juliettelavoie`). +Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Gabriel Rondeau-Genesse (:user:`RondeauG`), Juliette Lavoie (:user:`juliettelavoie`), Marco Braun (:user:`vindelico`). Breaking changes ^^^^^^^^^^^^^^^^ @@ -14,6 +14,7 @@ Breaking changes Internal changes ^^^^^^^^^^^^^^^^ +* Modified ``xscen.utils.change_unit`` to always adopt the name from the `variables_and_units dictionary` if the physical units are equal but their names are not (ex. degC <-> ˚C) (:pull:`373`). * Updated the `cookiecutter` template to the latest version. (:pull:`358`): * Addresses a handful of misconfigurations in the GitHub Workflows. * Added a few free `grep`-based hooks for finding unwanted artifacts in the code base. @@ -21,6 +22,7 @@ Internal changes * Added more tests. (:pull:`366`, :pull:`367`, :pull:`372`). * Refactored ``xs.spatial.subset`` into smaller functions. (:pull:`367`). * An `encoding` argument was added to ``xs.config.load_config``. (:pull:`370`). +* Various small fixes to the code to address FutureWarnings. (:pull:`380`). Bug fixes ^^^^^^^^^ @@ -31,6 +33,7 @@ Bug fixes * Fixed a bug to accept `group = False` in `adjust` function. (:pull:`366`). * `creep_weights` now correctly handles the case where the grid is small, `n` is large, and `mode=wrap`. (:issue:`367`). * Fixed a bug in ``tasmin_from_dtr`` and ``tasmax_from_dtr``, when `dtr` units differed from tasmin/max. (:pull:`372`). +* Fixed a bug where the requested chunking would be ignored when saving a dataset (:pull:`379`). v0.8.3 (2024-02-28) ------------------- diff --git a/docs/conf.py b/docs/conf.py index 3076a05c..4284498c 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -181,7 +181,6 @@ "_build", "Thumbs.db", ".DS_Store", - "notebooks/global_tas_average_obs.ipynb" ] # The name of the Pygments (syntax highlighting) style to use. diff --git a/docs/notebooks/2_getting_started.ipynb b/docs/notebooks/2_getting_started.ipynb index 0f3dc6fc..0fdad2e3 100644 --- a/docs/notebooks/2_getting_started.ipynb +++ b/docs/notebooks/2_getting_started.ipynb @@ -1,8 +1,27 @@ { "cells": [ { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "id": "0", + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# Remove flox spam\n", + "\n", + "import logging\n", + "\n", + "# Get the logger for the 'flox' package\n", + "logger = logging.getLogger(\"flox\")\n", + "# Set the logging level to WARNING\n", + "logger.setLevel(logging.WARNING)" + ] + }, + { + "cell_type": "markdown", + "id": "1", "metadata": {}, "source": [ "# Getting Started\n", @@ -28,7 +47,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1", + "id": "2", "metadata": { "tags": [] }, @@ -62,7 +81,7 @@ }, { "cell_type": "markdown", - "id": "2", + "id": "3", "metadata": {}, "source": [ "### Searching a subset of datasets within *DataCatalogs*\n", @@ -84,7 +103,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3", + "id": "4", "metadata": { "tags": [] }, @@ -110,7 +129,7 @@ }, { "cell_type": "markdown", - "id": "4", + "id": "5", "metadata": {}, "source": [ "The result of `search_data_catalog` is a dictionary with one entry per unique ID. Note that a unique ID can be associated to multiple *intake datasets*, as is the case here, because `intake-esm` groups catalog lines per *id - domain - processing_level - xrfeq*." @@ -119,7 +138,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5", + "id": "6", "metadata": { "tags": [] }, @@ -130,7 +149,7 @@ }, { "cell_type": "markdown", - "id": "6", + "id": "7", "metadata": {}, "source": [ "## Extracting data\n", @@ -164,7 +183,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7", + "id": "8", "metadata": { "tags": [] }, @@ -181,7 +200,7 @@ }, { "cell_type": "markdown", - "id": "8", + "id": "9", "metadata": {}, "source": [ "### Preparing arguments for *xarray*\n", @@ -200,7 +219,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9", + "id": "10", "metadata": { "tags": [] }, @@ -215,7 +234,7 @@ }, { "cell_type": "markdown", - "id": "10", + "id": "11", "metadata": {}, "source": [ "### Extraction function\n", @@ -239,7 +258,7 @@ { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "12", "metadata": { "tags": [] }, @@ -260,7 +279,7 @@ }, { "cell_type": "markdown", - "id": "12", + "id": "13", "metadata": {}, "source": [ "### Saving files to disk\n", @@ -288,7 +307,7 @@ { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "14", "metadata": { "tags": [] }, @@ -310,7 +329,7 @@ }, { "cell_type": "markdown", - "id": "14", + "id": "15", "metadata": {}, "source": [ "### Simplifying the call to extract_dataset() with search_data_catalogs()\n", @@ -321,7 +340,7 @@ { "cell_type": "code", "execution_count": null, - "id": "15", + "id": "16", "metadata": { "tags": [] }, @@ -335,7 +354,7 @@ { "cell_type": "code", "execution_count": null, - "id": "16", + "id": "17", "metadata": { "tags": [] }, @@ -355,7 +374,7 @@ }, { "cell_type": "markdown", - "id": "17", + "id": "18", "metadata": {}, "source": [ "Since `cat_sim` contains multiple datasets, extracting the data should be done by looping on `.items()` or `.values()`. Also, since 'CMIP6_ScenarioMIP_NCC_NorESM2-MM_ssp126_r1i1p1f1_example-region' was extracted in the previous step, `pcat.exists_in_cat` can be used to skip re-extracting." @@ -364,7 +383,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18", + "id": "19", "metadata": { "tags": [] }, @@ -394,7 +413,7 @@ { "cell_type": "code", "execution_count": null, - "id": "19", + "id": "20", "metadata": { "nbsphinx": "hidden" }, @@ -480,7 +499,7 @@ }, { "cell_type": "markdown", - "id": "20", + "id": "21", "metadata": {}, "source": [ "## Regridding data\n", @@ -502,7 +521,7 @@ { "cell_type": "code", "execution_count": null, - "id": "21", + "id": "22", "metadata": { "tags": [] }, @@ -523,7 +542,7 @@ }, { "cell_type": "markdown", - "id": "22", + "id": "23", "metadata": {}, "source": [ "### Masking grid cells\n", @@ -542,7 +561,7 @@ { "cell_type": "code", "execution_count": null, - "id": "23", + "id": "24", "metadata": { "tags": [] }, @@ -570,7 +589,7 @@ { "cell_type": "code", "execution_count": null, - "id": "24", + "id": "25", "metadata": { "tags": [] }, @@ -592,7 +611,7 @@ }, { "cell_type": "markdown", - "id": "25", + "id": "26", "metadata": {}, "source": [ "### Preparing arguments for xESMF.Regridder\n", @@ -646,7 +665,7 @@ { "cell_type": "code", "execution_count": null, - "id": "26", + "id": "27", "metadata": { "tags": [] }, @@ -657,7 +676,7 @@ }, { "cell_type": "markdown", - "id": "27", + "id": "28", "metadata": {}, "source": [ "### Regridding function\n", @@ -672,7 +691,7 @@ { "cell_type": "code", "execution_count": null, - "id": "28", + "id": "29", "metadata": { "tags": [] }, @@ -721,7 +740,7 @@ { "cell_type": "code", "execution_count": null, - "id": "29", + "id": "30", "metadata": { "tags": [] }, @@ -759,7 +778,7 @@ }, { "cell_type": "markdown", - "id": "30", + "id": "31", "metadata": {}, "source": [ "## Bias adjusting data\n", @@ -779,7 +798,7 @@ { "cell_type": "code", "execution_count": null, - "id": "31", + "id": "32", "metadata": { "tags": [] }, @@ -792,7 +811,7 @@ }, { "cell_type": "markdown", - "id": "32", + "id": "33", "metadata": {}, "source": [ "### Bias adjustment function\n", @@ -827,7 +846,7 @@ { "cell_type": "code", "execution_count": null, - "id": "33", + "id": "34", "metadata": { "tags": [] }, @@ -874,7 +893,7 @@ { "cell_type": "code", "execution_count": null, - "id": "34", + "id": "35", "metadata": { "tags": [] }, @@ -935,7 +954,7 @@ }, { "cell_type": "markdown", - "id": "35", + "id": "36", "metadata": {}, "source": [ "## Computing indicators\n", @@ -973,7 +992,7 @@ { "cell_type": "code", "execution_count": null, - "id": "36", + "id": "37", "metadata": { "tags": [] }, @@ -1003,7 +1022,7 @@ { "cell_type": "code", "execution_count": null, - "id": "37", + "id": "38", "metadata": { "tags": [] }, @@ -1014,7 +1033,7 @@ }, { "cell_type": "markdown", - "id": "38", + "id": "39", "metadata": {}, "source": [ "## Spatio-temporal aggregation\n", @@ -1043,7 +1062,7 @@ { "cell_type": "code", "execution_count": null, - "id": "39", + "id": "40", "metadata": { "tags": [] }, @@ -1074,7 +1093,7 @@ { "cell_type": "code", "execution_count": null, - "id": "40", + "id": "41", "metadata": { "tags": [] }, @@ -1085,7 +1104,7 @@ }, { "cell_type": "markdown", - "id": "41", + "id": "42", "metadata": {}, "source": [ "#### Horizon coordinate and time dimension\n", @@ -1103,7 +1122,7 @@ { "cell_type": "code", "execution_count": null, - "id": "42", + "id": "43", "metadata": { "tags": [] }, @@ -1116,7 +1135,7 @@ }, { "cell_type": "markdown", - "id": "43", + "id": "44", "metadata": {}, "source": [ "### Computing deltas\n", @@ -1132,7 +1151,7 @@ { "cell_type": "code", "execution_count": null, - "id": "44", + "id": "45", "metadata": { "tags": [] }, @@ -1161,7 +1180,7 @@ { "cell_type": "code", "execution_count": null, - "id": "45", + "id": "46", "metadata": { "tags": [] }, @@ -1173,7 +1192,7 @@ }, { "cell_type": "markdown", - "id": "46", + "id": "47", "metadata": {}, "source": [ "### Spatial mean\n", @@ -1197,7 +1216,7 @@ { "cell_type": "code", "execution_count": null, - "id": "47", + "id": "48", "metadata": { "tags": [] }, @@ -1226,7 +1245,7 @@ { "cell_type": "code", "execution_count": null, - "id": "48", + "id": "49", "metadata": { "tags": [] }, @@ -1238,7 +1257,7 @@ }, { "cell_type": "markdown", - "id": "49", + "id": "50", "metadata": {}, "source": [ "## Ensemble statistics\n", @@ -1264,7 +1283,7 @@ { "cell_type": "code", "execution_count": null, - "id": "50", + "id": "51", "metadata": { "tags": [] }, @@ -1281,7 +1300,7 @@ }, { "cell_type": "markdown", - "id": "51", + "id": "52", "metadata": {}, "source": [ "### Ensemble stats\n", @@ -1299,7 +1318,7 @@ { "cell_type": "code", "execution_count": null, - "id": "52", + "id": "53", "metadata": { "tags": [] }, @@ -1322,7 +1341,7 @@ { "cell_type": "code", "execution_count": null, - "id": "53", + "id": "54", "metadata": { "tags": [] }, @@ -1333,7 +1352,7 @@ }, { "cell_type": "markdown", - "id": "54", + "id": "55", "metadata": {}, "source": [ "## Clean up\n", @@ -1351,7 +1370,7 @@ }, { "cell_type": "markdown", - "id": "55", + "id": "56", "metadata": {}, "source": [ "### Calendars\n", @@ -1366,7 +1385,7 @@ { "cell_type": "code", "execution_count": null, - "id": "56", + "id": "57", "metadata": {}, "outputs": [], "source": [ @@ -1376,7 +1395,7 @@ }, { "cell_type": "markdown", - "id": "57", + "id": "58", "metadata": {}, "source": [ "### Attributes\n", @@ -1398,7 +1417,7 @@ { "cell_type": "code", "execution_count": null, - "id": "58", + "id": "59", "metadata": {}, "outputs": [], "source": [ @@ -1417,7 +1436,7 @@ { "cell_type": "code", "execution_count": null, - "id": "59", + "id": "60", "metadata": {}, "outputs": [], "source": [ @@ -1440,7 +1459,7 @@ { "cell_type": "code", "execution_count": null, - "id": "60", + "id": "61", "metadata": {}, "outputs": [], "source": [ @@ -1481,7 +1500,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/docs/notebooks/3_diagnostics.ipynb b/docs/notebooks/3_diagnostics.ipynb index 03cb7e76..a9f1068f 100644 --- a/docs/notebooks/3_diagnostics.ipynb +++ b/docs/notebooks/3_diagnostics.ipynb @@ -4,6 +4,25 @@ "cell_type": "code", "execution_count": null, "id": "0", + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# Remove flox spam\n", + "\n", + "import logging\n", + "\n", + "# Get the logger for the 'flox' package\n", + "logger = logging.getLogger(\"flox\")\n", + "# Set the logging level to WARNING\n", + "logger.setLevel(logging.WARNING)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", "metadata": { "tags": [] }, @@ -35,7 +54,7 @@ }, { "cell_type": "markdown", - "id": "1", + "id": "2", "metadata": {}, "source": [ "# Diagnostics\n", @@ -48,7 +67,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2", + "id": "3", "metadata": { "tags": [] }, @@ -62,7 +81,7 @@ }, { "cell_type": "markdown", - "id": "3", + "id": "4", "metadata": {}, "source": [ "## Health checks\n", @@ -93,7 +112,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4", + "id": "5", "metadata": { "tags": [] }, @@ -110,7 +129,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5", + "id": "6", "metadata": { "tags": [] }, @@ -131,7 +150,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6", + "id": "7", "metadata": { "tags": [] }, @@ -153,7 +172,7 @@ }, { "cell_type": "markdown", - "id": "7", + "id": "8", "metadata": {}, "source": [ "## Properties and measures\n", @@ -170,7 +189,7 @@ }, { "cell_type": "markdown", - "id": "8", + "id": "9", "metadata": {}, "source": [ "Let's start by calculating the properties on the reference dataset.\n", @@ -215,7 +234,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9", + "id": "10", "metadata": {}, "outputs": [], "source": [ @@ -226,7 +245,7 @@ }, { "cell_type": "markdown", - "id": "10", + "id": "11", "metadata": {}, "source": [ "The properties can be given an argument `group` ('time', 'time.season' or 'time.month'). For 'time', the time collapsing will be performed over the whole period. For 'time.season'/'time.month', the time collapsing will be performed over each season/month. See `quantile_98_tas` as an example for season." @@ -235,7 +254,7 @@ { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "12", "metadata": {}, "outputs": [], "source": [ @@ -263,7 +282,7 @@ }, { "cell_type": "markdown", - "id": "12", + "id": "13", "metadata": {}, "source": [ "To compute a measure as well as a property, add the `dref_for_measure` argument with the reference properties calculated above. This will mesure the difference between the reference properties and the scenario properties.\n", @@ -273,7 +292,7 @@ { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "14", "metadata": {}, "outputs": [], "source": [ @@ -313,7 +332,7 @@ { "cell_type": "code", "execution_count": null, - "id": "14", + "id": "15", "metadata": {}, "outputs": [], "source": [ @@ -332,7 +351,7 @@ }, { "cell_type": "markdown", - "id": "15", + "id": "16", "metadata": {}, "source": [ "If you have different methods of bias adjustement, you might want to compare them and see for each property which method performs best (bias close to 0, ratio close to 1) with a `measures_heatmap`.\n", @@ -350,7 +369,7 @@ { "cell_type": "code", "execution_count": null, - "id": "16", + "id": "17", "metadata": {}, "outputs": [], "source": [ @@ -384,7 +403,7 @@ { "cell_type": "code", "execution_count": null, - "id": "17", + "id": "18", "metadata": {}, "outputs": [], "source": [ @@ -397,7 +416,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18", + "id": "19", "metadata": {}, "outputs": [], "source": [ @@ -424,7 +443,7 @@ }, { "cell_type": "markdown", - "id": "19", + "id": "20", "metadata": {}, "source": [ "`measure_improved` is another way to compare two datasets. It returns the fraction of the grid points that performed better in the second dataset than in the first dataset. It is useful to see which of properties are best corrected for by the bias adjustement method." @@ -433,7 +452,7 @@ { "cell_type": "code", "execution_count": null, - "id": "20", + "id": "21", "metadata": {}, "outputs": [], "source": [ @@ -484,7 +503,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/docs/notebooks/4_ensembles.ipynb b/docs/notebooks/4_ensembles.ipynb index 5ec6173c..16e6e803 100644 --- a/docs/notebooks/4_ensembles.ipynb +++ b/docs/notebooks/4_ensembles.ipynb @@ -1,5 +1,23 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# Remove flox spam\n", + "\n", + "import logging\n", + "\n", + "# Get the logger for the 'flox' package\n", + "logger = logging.getLogger(\"flox\")\n", + "# Set the logging level to WARNING\n", + "logger.setLevel(logging.WARNING)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -36,7 +54,9 @@ "\n", "for d in datasets:\n", " ds = open_dataset(datasets[d]).isel(lon=slice(0, 4), lat=slice(0, 4))\n", - " ds = xs.climatological_mean(ds, window=30, periods=[[1981, 2010], [2021, 2050]])\n", + " ds = xs.climatological_op(\n", + " ds, op=\"mean\", window=30, periods=[[1981, 2010], [2021, 2050]]\n", + " )\n", " datasets[d] = xs.compute_deltas(ds, reference_horizon=\"1981-2010\")\n", " datasets[d].attrs[\"cat:id\"] = d # Required by build_reduction_data\n", " datasets[d].attrs[\"cat:xrfreq\"] = \"AS-JAN\"" @@ -270,7 +290,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/docs/notebooks/5_warminglevels.ipynb b/docs/notebooks/5_warminglevels.ipynb index eaf1031e..bb6cbcbe 100644 --- a/docs/notebooks/5_warminglevels.ipynb +++ b/docs/notebooks/5_warminglevels.ipynb @@ -1,8 +1,27 @@ { "cells": [ { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "id": "0", + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "# Remove flox spam\n", + "\n", + "import logging\n", + "\n", + "# Get the logger for the 'flox' package\n", + "logger = logging.getLogger(\"flox\")\n", + "# Set the logging level to WARNING\n", + "logger.setLevel(logging.WARNING)" + ] + }, + { + "cell_type": "markdown", + "id": "1", "metadata": {}, "source": [ "# Warming levels\n", @@ -15,7 +34,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1", + "id": "2", "metadata": { "tags": [] }, @@ -101,7 +120,7 @@ }, { "cell_type": "markdown", - "id": "2", + "id": "3", "metadata": {}, "source": [ "## Find warming levels with only the model name\n", @@ -128,7 +147,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3", + "id": "4", "metadata": { "tags": [] }, @@ -165,7 +184,7 @@ }, { "cell_type": "markdown", - "id": "4", + "id": "5", "metadata": {}, "source": [ "## Find and extract data by warming levels\n", @@ -196,7 +215,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5", + "id": "6", "metadata": { "tags": [] }, @@ -219,7 +238,7 @@ }, { "cell_type": "markdown", - "id": "6", + "id": "7", "metadata": {}, "source": [ "#### Vectorized subsetting\n", @@ -234,7 +253,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -253,7 +272,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8", + "id": "9", "metadata": {}, "outputs": [], "source": [ @@ -262,7 +281,7 @@ }, { "cell_type": "markdown", - "id": "9", + "id": "10", "metadata": {}, "source": [ "### Method #2: Producing horizons\n", @@ -282,7 +301,7 @@ { "cell_type": "code", "execution_count": null, - "id": "10", + "id": "11", "metadata": { "tags": [] }, @@ -314,7 +333,7 @@ { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "12", "metadata": { "tags": [] }, @@ -325,7 +344,7 @@ }, { "cell_type": "markdown", - "id": "12", + "id": "13", "metadata": {}, "source": [ "## Deltas and spatial aggregation\n", @@ -336,7 +355,7 @@ { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "14", "metadata": { "tags": [] }, @@ -376,7 +395,7 @@ { "cell_type": "code", "execution_count": null, - "id": "14", + "id": "15", "metadata": { "tags": [] }, @@ -387,7 +406,7 @@ }, { "cell_type": "markdown", - "id": "15", + "id": "16", "metadata": {}, "source": [ "## Ensemble statistics\n", @@ -400,7 +419,7 @@ { "cell_type": "code", "execution_count": null, - "id": "16", + "id": "17", "metadata": { "tags": [] }, @@ -412,7 +431,7 @@ { "cell_type": "code", "execution_count": null, - "id": "17", + "id": "18", "metadata": { "tags": [] }, @@ -427,7 +446,7 @@ }, { "cell_type": "markdown", - "id": "18", + "id": "19", "metadata": {}, "source": [ "Next, the weights and the datasets can be passed to `xs.ensemble_stats` to calculate the ensemble statistics." @@ -436,7 +455,7 @@ { "cell_type": "code", "execution_count": null, - "id": "19", + "id": "20", "metadata": { "tags": [] }, @@ -464,7 +483,7 @@ { "cell_type": "code", "execution_count": null, - "id": "20", + "id": "21", "metadata": {}, "outputs": [], "source": [ @@ -483,7 +502,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/docs/notebooks/6_config.ipynb b/docs/notebooks/6_config.ipynb index 0ea24a09..742466d5 100644 --- a/docs/notebooks/6_config.ipynb +++ b/docs/notebooks/6_config.ipynb @@ -277,7 +277,7 @@ "import xarray as xr\n", "\n", "# Create a dummy dataset\n", - "time = pd.date_range(\"1951-01-01\", \"2100-01-01\", freq=\"AS-JAN\")\n", + "time = pd.date_range(\"1951-01-01\", \"2100-01-01\", freq=\"YS-JAN\")\n", "da = xr.DataArray([0] * len(time), coords={\"time\": time})\n", "da.name = \"test\"\n", "ds = da.to_dataset()\n", @@ -378,7 +378,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/environment-dev.yml b/environment-dev.yml index 9ff631b4..cd20dde0 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -35,7 +35,7 @@ dependencies: - zarr # Opt - nc-time-axis >=1.3.1 - - pyarrow >=1.0.0 + - pyarrow >=10.0.1 # Dev - babel - black ==24.2.0 diff --git a/environment.yml b/environment.yml index e827ccc1..b54bd875 100644 --- a/environment.yml +++ b/environment.yml @@ -37,5 +37,5 @@ dependencies: - babel # Opt - nc-time-axis >=1.3.1 - - pyarrow >=1.0.0 + - pyarrow >=10.0.1 - pip diff --git a/pyproject.toml b/pyproject.toml index dcfd0131..8ba4a99e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,7 +55,7 @@ dependencies = [ "pandas >=2.2", "parse", # Used when opening catalogs. - "pyarrow", + "pyarrow>=10.0.1", "pyyaml", "rechunker", "scipy", @@ -127,7 +127,7 @@ target-version = [ ] [tool.bumpversion] -current_version = "0.8.4-dev.6" +current_version = "0.8.4-dev.10" commit = true commit_args = "--no-verify" tag = false diff --git a/docs/notebooks/global_tas_average_obs.ipynb b/scripts/global_tas_average_obs.ipynb similarity index 100% rename from docs/notebooks/global_tas_average_obs.ipynb rename to scripts/global_tas_average_obs.ipynb diff --git a/xscen/__init__.py b/xscen/__init__.py index 4c002371..2a8e8f86 100644 --- a/xscen/__init__.py +++ b/xscen/__init__.py @@ -53,7 +53,7 @@ __author__ = """Gabriel Rondeau-Genesse""" __email__ = "rondeau-genesse.gabriel@ouranos.ca" -__version__ = "0.8.4-dev.6" +__version__ = "0.8.4-dev.10" def warning_on_one_line( @@ -75,9 +75,3 @@ def warning_on_one_line( "Pass observed=False to retain current behavior or observed=True to adopt the future default " "and silence this warning.", ) -warnings.filterwarnings( - "ignore", - category=FutureWarning, - module="intake_esm", - message="DataFrame.applymap has been deprecated. Use DataFrame.map instead.", -) diff --git a/xscen/catutils.py b/xscen/catutils.py index d7b630e1..46fc4145 100644 --- a/xscen/catutils.py +++ b/xscen/catutils.py @@ -634,11 +634,13 @@ def parse_directory( # noqa: C901 # translate xrfreq into frequencies and vice-versa if {"xrfreq", "frequency"}.issubset(df.columns): - df["xrfreq"].fillna( - df["frequency"].apply(CV.frequency_to_xrfreq, default=pd.NA), inplace=True + df.fillna( + {"xrfreq": df["frequency"].apply(CV.frequency_to_xrfreq, default=pd.NA)}, + inplace=True, ) - df["frequency"].fillna( - df["xrfreq"].apply(CV.xrfreq_to_frequency, default=pd.NA), inplace=True + df.fillna( + {"frequency": df["xrfreq"].apply(CV.xrfreq_to_frequency, default=pd.NA)}, + inplace=True, ) # Parse dates @@ -757,7 +759,7 @@ def parse_from_ds( # noqa: C901 attrs["variable"] = tuple(sorted(variables)) elif name in ("frequency", "xrfreq") and time is not None and time.size > 3: # round to the minute to catch floating point imprecision - freq = xr.infer_freq(time.round("T")) + freq = xr.infer_freq(time.round("min")) if freq: if "xrfreq" in names: attrs["xrfreq"] = freq diff --git a/xscen/extract.py b/xscen/extract.py index f1341039..b057637d 100644 --- a/xscen/extract.py +++ b/xscen/extract.py @@ -175,7 +175,7 @@ def extract_dataset( # noqa: C901 ) out_dict = {} - for xrfreq in pd.unique([x for y in variables_and_freqs.values() for x in y]): + for xrfreq in np.unique([x for y in variables_and_freqs.values() for x in y]): ds = xr.Dataset() attrs = {} # iterate on the datasets, in reverse timedelta order @@ -814,7 +814,8 @@ def search_data_catalogs( # noqa: C901 valid_tp = [] for var, group in varcat.df.groupby( varcat.esmcat.aggregation_control.groupby_attrs - + ["variable"] + + ["variable"], + observed=True, ): valid_tp.append( subset_file_coverage( diff --git a/xscen/io.py b/xscen/io.py index 572764eb..11357cc8 100644 --- a/xscen/io.py +++ b/xscen/io.py @@ -401,6 +401,8 @@ def save_to_netcdf( for var in list(ds.data_vars.keys()): if keepbits := _get_keepbits(bitround, var, ds[var].dtype): ds = ds.assign({var: round_bits(ds[var], keepbits)}) + # Remove original_shape from encoding, since it can cause issues with some engines. + ds[var].encoding.pop("original_shape", None) _coerce_attrs(ds.attrs) for var in ds.variables.values(): @@ -519,6 +521,8 @@ def _skip(var): encoding.pop(var) if keepbits := _get_keepbits(bitround, var, ds[var].dtype): ds = ds.assign({var: round_bits(ds[var], keepbits)}) + # Remove original_shape from encoding, since it can cause issues with some engines. + ds[var].encoding.pop("original_shape", None) if len(ds.data_vars) == 0: return None @@ -904,8 +908,12 @@ def rechunk_for_saving(ds: xr.Dataset, rechunk: dict): ds[rechunk_var] = ds[rechunk_var].chunk( {d: chnks for d, chnks in rechunk_dims.items() if d in ds[rechunk_var].dims} ) - ds[rechunk_var].encoding.pop("chunksizes", None) + ds[rechunk_var].encoding["chunksizes"] = tuple( + rechunk_dims[d] if d in rechunk_dims else ds[d].shape[0] + for d in ds[rechunk_var].dims + ) ds[rechunk_var].encoding.pop("chunks", None) + ds[rechunk_var].encoding.pop("preferred_chunks", None) return ds diff --git a/xscen/utils.py b/xscen/utils.py index 88c3d866..7fe938a1 100644 --- a/xscen/utils.py +++ b/xscen/utils.py @@ -172,7 +172,7 @@ def date_parser( # noqa: C901 date : str, cftime.datetime, pd.Timestamp, datetime.datetime, pd.Period Date to be converted end_of_period : bool or str - If 'Y' or 'M', the returned date will be the end of the year or month that contains the received date. + If 'YE' or 'ME', the returned date will be the end of the year or month that contains the received date. If True, the period is inferred from the date's precision, but `date` must be a string, otherwise nothing is done. out_dtype : str Choices are 'datetime', 'period' or 'str' @@ -245,12 +245,12 @@ def _parse_date(date, fmts): if isinstance(end_of_period, str) or (end_of_period is True and fmt): quasiday = (pd.Timedelta(1, "d") - pd.Timedelta(1, "s")).as_unit(date.unit) - if end_of_period == "Y" or "m" not in fmt: + if end_of_period in ["Y", "YE"] or "m" not in fmt: date = ( - pd.tseries.frequencies.to_offset("A-DEC").rollforward(date) + quasiday + pd.tseries.frequencies.to_offset("YE-DEC").rollforward(date) + quasiday ) - elif end_of_period == "M" or "d" not in fmt: - date = pd.tseries.frequencies.to_offset("M").rollforward(date) + quasiday + elif end_of_period in ["M", "ME"] or "d" not in fmt: + date = pd.tseries.frequencies.to_offset("ME").rollforward(date) + quasiday # TODO: Implement subdaily ? if out_dtype == "str": @@ -718,6 +718,9 @@ def change_units(ds: xr.Dataset, variables_and_units: dict) -> xr.Dataset: raise NotImplementedError( f"No known transformation between {ds[v].units} and {variables_and_units[v]} (temporal dimensionality mismatch)." ) + elif (v in ds) and (ds[v].units != variables_and_units[v]): + # update unit name if physical units are equal but not their name (ex. degC vs °C) + ds[v] = ds[v].assign_attrs(units=variables_and_units[v]) return ds