Merge pull request #463 from freemansw1/v160_merge_main

Merge main into v1.6.0 branch
tobac-project · Jan 6, 2025 · 76fdd71 · 76fdd71
2 parents 0ebee82 + 9d4f03e
commit 76fdd71
Show file tree

Hide file tree

Showing 55 changed files with 94,487 additions and 91,581 deletions.
diff --git a/.github/workflows/check_formatting.yml b/.github/workflows/check_formatting.yml
@@ -1,23 +1,19 @@
-name: check_formatting
+name: Check Python File Formatting with Black
 on: [push, pull_request]
 jobs:
   formatting_job:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - name: Set up conda
-        uses: conda-incubator/setup-miniconda@v2
+        uses: mamba-org/setup-micromamba@v1
         with:
-          miniforge-version: latest
-          miniforge-variant: mambaforge
-          channel-priority: strict
-          channels: conda-forge
-          show-channel-urls: true
-          use-only-tar-bz2: true
-
-      - name: Install dependencies and check formatting
-        shell: bash -l {0}
+          environment-file: environment-ci.yml
+          generate-run-shell: true
+          cache-environment: true
+          cache-downloads: true
+      - name: Check formatting
+        shell: micromamba-shell {0}
         run:
-          mamba install --quiet --yes --file requirements.txt black &&
           black --version &&
           black tobac --check --diff 
diff --git a/.github/workflows/check_json.yml b/.github/workflows/check_json.yml
@@ -8,13 +8,12 @@ jobs:
         shell: bash -el {0}
     steps:
       - name: check out repository code
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       - name: set up conda environment
-        uses: conda-incubator/setup-miniconda@v2
+        uses: actions/setup-python@v5
         with:
-          auto-update-conda: true
-          auto-activate-base: false
-          activate-environment: checkjson-env
+          python-version: '3.12'
+          cache: 'pip' # caching pip dependencies
       - name: Install check-jsonschema
         run: |
           pip install check-jsonschema

diff --git a/.github/workflows/check_notebooks.yml b/.github/workflows/check_notebooks.yml
@@ -9,22 +9,13 @@ jobs:
     steps:
       - name: check out repository code
         uses: actions/checkout@v3
-      - name: set up conda environment
-        uses: conda-incubator/setup-miniconda@v2
+      - name: set up mamba environment
+        uses: mamba-org/setup-micromamba@v1
         with:
-          miniforge-version: latest
-          miniforge-variant: mambaforge
-          channel-priority: strict
-          channels: conda-forge
-          show-channel-urls: true
-          use-only-tar-bz2: true
-          auto-update-conda: true
-          auto-activate-base: false
-          activate-environment: notebook-env
-      - name: Install tobac dependencies
-        run: |
-          mamba install -c conda-forge --yes ffmpeg gcc jupyter pytables
-          mamba install -c conda-forge --yes --file example_requirements.txt
+          environment-file: environment-examples.yml
+          generate-run-shell: true
+          cache-environment: true
+          cache-downloads: true
       - name: Install tobac
         run: |
           pip install .

diff --git a/.github/workflows/codecov-CI.yml b/.github/workflows/codecov-CI.yml
@@ -7,24 +7,20 @@ jobs:
     runs-on: ubuntu-latest
     env:
       OS: ubuntu-latest
-      PYTHON: "3.9"
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       # Similar to MetPy install-conda action
       - name: Set up conda
-        uses: conda-incubator/setup-miniconda@v2
+        uses: mamba-org/setup-micromamba@v1
         with:
-          miniforge-version: latest
-          miniforge-variant: mambaforge
-          channel-priority: strict
-          channels: conda-forge
-          show-channel-urls: true
-          use-only-tar-bz2: true
+          environment-file: environment-ci.yml
+          generate-run-shell: true
+          cache-environment: true
+          cache-downloads: true
 
-      - name: Install dependencies and generate report
-        shell: bash -l {0}
+      - name: Generate report
+        shell: micromamba-shell {0}
         run:
-          mamba install --quiet --yes --file requirements.txt coverage pytest-cov &&
           python -m coverage run -m pytest --cov=./ --cov-report=xml
       - name: Upload Coverage to Codecov
         uses: codecov/codecov-action@v4

diff --git a/.github/workflows/matrix_ci.yml b/.github/workflows/matrix_ci.yml
@@ -20,7 +20,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
         os: [macos, ubuntu, windows]
 
     steps:
@@ -35,7 +35,7 @@ jobs:
           cache-downloads: true
           channels: conda-forge
           channel-priority: strict
-          python-version: ${{ matrix.python-version }}
+          create-args: python=${{ matrix.python-version }}
 
       - name: Fetch all history for all tags and branches
         run: |

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
@@ -7,49 +7,44 @@ permissions:
   pull-requests: write
 
 jobs:
-  build:
+  lint-workflow:
     runs-on: ubuntu-latest
     defaults:
       run:
         shell: bash -l {0}
     steps:
       - name: Check out Git repository
-        uses: actions/checkout@v3
-
-      - name: Set up conda
-        uses: conda-incubator/setup-miniconda@v2
+        uses: actions/checkout@v4
+      - name: Set up mamba environment
+        uses: mamba-org/setup-micromamba@v1
         with:
-          miniforge-version: latest
-          miniforge-variant: mambaforge
-          channel-priority: strict
-          channels: conda-forge
-          show-channel-urls: true
-          use-only-tar-bz2: true
-
-      - name: Install tobac and pylint
-        run: |
-          mamba install --yes pylint
+          environment-file: environment-ci.yml
+          generate-run-shell: true
+          cache-environment: true
+          cache-downloads: true
+      - name: Install tobac
+        run:
           pip install .
 
       - name: Store the PR branch
-        run: |
+        run:
           echo "SHA=$(git rev-parse "$GITHUB_SHA")" >> $GITHUB_OUTPUT
         id: git
 
       - name: Checkout RC branch
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           ref: ${{ github.base_ref }}
 
       - name: Get pylint score of RC branch
-        run: |
+        run:
           pylint tobac --disable=C --exit-zero
         id: main_score
 
       - name: Checkout PR branch
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
-          ref: "${{ steps.git.outputs.SHA }}"  
+          ref: ${{ steps.git.outputs.SHA }}
 
       - name: Get pylint score of PR branch
         run: |

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,47 @@
 ### Tobac Changelog
 
+_**Version 1.5.5:**_
+
+**Bug fixes**
+
+- Including of annotations import for python versions before 3.10 [#468](https://github.com/tobac-project/tobac/pull/468)
+- Fix bulk statistics calculation when provided a dask array [#474](https://github.com/tobac-project/tobac/pull/474)
+
+**Internal Enhancements**
+
+- Fix matrix testing to use the specified python versions [#468](https://github.com/tobac-project/tobac/pull/468)
+
+
+_**Version 1.5.4:**_
+
+**Enhancements for Users**
+
+- Added the ability to use the Minimum Euclidean Spanning Tree merge/split method on data with periodic boundaries [#372](https://github.com/tobac-project/tobac/pull/372)
+- Added the ability to calculate online bulk statistics during feature detection on the raw (i.e., unsmoothed) data [#449](https://github.com/tobac-project/tobac/pull/449)
+
+**Bug fixes**
+
+- Fixes to calculations of bulk statistics [#437](https://github.com/tobac-project/tobac/pull/437)
+- Fixes to handling of PBC feature points on the PBC wraparound border [#434](https://github.com/tobac-project/tobac/pull/434)
+- Fixed an error that allows non-matching features to be used in the offline bulk statistics calculation [#448](https://github.com/tobac-project/tobac/pull/448)
+- Fixed a bug that prevented using minimum distance filtering with varying vertical coordinates [#452](https://github.com/tobac-project/tobac/pull/452)
+
+**Documentation**
+
+- Add thumbnails to the new example gallery [#428](https://github.com/tobac-project/tobac/pull/428)
+- Added documentation for developers [#281](https://github.com/tobac-project/tobac/pull/281)
+- Updated documentation for the `n_min_threshold` function in feature detection [#432](https://github.com/tobac-project/tobac/pull/432)
+- Added documentation for dealing with big datasets [#408](https://github.com/tobac-project/tobac/pull/408)
+- Updated documentation to note that the *tobac* v1.5.0 paper in GMD is in its final form [#450](https://github.com/tobac-project/tobac/pull/450) 
+
+**Internal Enhancements**
+
+- PBC Distance Function handling improved for tracking and other portions of the library that uses it [#386](https://github.com/tobac-project/tobac/pull/386)
+- Added tests to `tobac.utils.get_spacings`  [#429](https://github.com/tobac-project/tobac/pull/429)
+- Added matrix testing for Python 3.12  [#451](https://github.com/tobac-project/tobac/pull/451)
+- Resolved issues around updating dependencies in `black` formatting checks and Zenodo JSON checks [#457](https://github.com/tobac-project/tobac/pull/457)
+
+
 _**Version 1.5.3:**_
 
 **Enhancements for Users**

diff --git a/dev_requirements.txt b/dev_requirements.txt
@@ -13,3 +13,4 @@ pre-commit
 black
 pytest
 typing_extensions
+nbconvert
diff --git a/doc/big_datasets.rst b/doc/big_datasets.rst
@@ -1,11 +1,55 @@
 Handling Large Datasets
 -------------------------------------
 
-Often, one desires to use *tobac* to identify and track features in large datasets ("big data"). This documentation strives to suggest various methods for doing so efficiently. Current versions of *tobac* do not allow for out-of-memory computation, meaning that these strategies may need to be employed for both computational and memory reasons. 
+Often, one desires to use *tobac* to identify and track features in large datasets ("big data"). This documentation strives to suggest various methods for doing so efficiently. Current versions of *tobac* do not support out-of-core (e.g., :code:`dask`) computation, meaning that these strategies may need to be employed for both computational and memory reasons.
 
 .. _Split Feature Detection:
 
 =======================
-Split Feature Detection
+Split Feature Detection and Run in Parallel
 =======================
-Current versions of threshold feature detection (see :doc:`feature_detection_overview`) are time independent, meaning that one can parallelize feature detection across all times (although not across space). *tobac* provides the :py:meth:`tobac.utils.combine_tobac_feats` function to combine a list of dataframes produced by a parallelization method (such as :code:`jug` or :code:`multiprocessing.pool`) into a single combined dataframe suitable to perform tracking with. 
+Current versions of threshold feature detection (see :doc:`feature_detection_overview`) are time independent, meaning that one can easily parallelize feature detection across all times (although not across space). *tobac* provides the :py:meth:`tobac.utils.combine_feature_dataframes` function to combine a list of dataframes produced by a parallelization method (such as :code:`jug`,  :code:`multiprocessing.pool`, or :code:`dask.bag`) into a single combined dataframe suitable to perform tracking with.
+
+Below is a snippet from a larger notebook demonstrating how to run feature detection in parallel ( :doc:`big_datasets_examples/notebooks/parallel_processing_tobac`):
+
+::
+
+    # build list of tracked variables using Dask.Bag
+
+    b = db.from_sequence(
+        [
+            combined_ds["data"][x : x + 1]
+            for x in range(len(combined_ds["time"]))
+        ],
+        npartitions=1,
+    )
+    out_feature_dfs = db.map(
+        lambda x: tobac.feature_detection_multithreshold(
+            x.to_iris(), 4000, **parameters_features
+        ),
+        b,
+    ).compute()
+
+    combined_dataframes = tobac.utils.general.combine_feature_dataframes(out_feature_dfs)
+
+
+.. _Split Segmentation:
+
+======================================
+Split Segmentation and Run in Parallel
+======================================
+Recall that the segmentation mask (see :doc:`segmentation_output`) is the same size as the input grid, which results in large files when handling large input datasets. The following strategies can help reduce the output size and make segmentation masks more useful for the analysis.
+
+The first strategy is to only segment on features *after tracking and quality control*. While this will not directly impact performance, waiting to run segmentation on the final set of features (after discarding, e.g., non-tracked cells) can make analysis of the output segmentation dataset easier.
+
+To enhance the speed at which segmentation runs, one can process multiple segmentation times in parallel independently, similar to feature detection. Unlike feature detection, however, there is currently no built-in *tobac* method to combine multiple segmentation times into a single file. While one can do this using typical NetCDF tools such as :code:`nccat` or with xarray utilities such as :code:`xr.concat`, you can also leave the segmentation mask output as separate files, opening them later with multiple file retrievals such as :code:`xr.open_mfdataset`.
+
+
+.. _Tracking Hanging:
+
+=====================================
+Tracking Hangs with too many Features
+=====================================
+
+When tracking on a large dataset, :code:`tobac.tracking.linking_trackpy` can hang using the default parameters. This is due to the tracking library :code:`trackpy` searching for the next timestep's feature in too large of an area. This can be solved *without impact to scientific output* by lowering the :code:`subnetwork_size` parameter in :code:`tobac.tracking.linking_trackpy`.
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,3 +13,4 @@ pre-commit @@
     black
     pytest
     typing_extensions
+    nbconvert