Merge branch 'main' into develop

nasa-nccs-hpda · May 14, 2024 · caca9c8 · caca9c8
2 parents a71af56 + 4e822b2
commit caca9c8
Show file tree

Hide file tree

Showing 21 changed files with 795 additions and 29 deletions.
diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml
@@ -0,0 +1,61 @@
+name: Publish Docs
+
+on:
+  push:
+    branches: [ main ]
+  release:
+    types: [ created ]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  docs:
+    name: Publish Docs
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          persist-credentials: false
+
+      - name: Setup Conda
+        uses: s-weigand/setup-conda@v1
+        with:
+          python-version: 3.9
+          conda-channels: conda-forge
+
+      - name: Install and Build
+        shell: bash
+        run: |
+          conda config --prepend channels conda-forge
+          conda config --set channel_priority strict
+          conda create -n docs python=3.9 rasterio xarray scipy pyproj pandoc sphinx sphinx-autodoc-typehints jupyter_sphinx sphinx_rtd_theme sphinx-click nbsphinx myst-nb
+          source activate docs
+          python -m pip install -e .[doc]
+          sphinx-build -b html docs/ docs/_build/
+          #cd docs/
+          #make html
+
+      - name: Deploy 🚀
+        uses: JamesIves/github-pages-deploy-action@v4
+        if: ${{ github.event_name == 'release' }}
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          branch: gh-pages
+          folder: docs/_build/
+          #folder: docs/_build/html
+          clean: false
+          target-folder: ${{ github.ref }}
+
+      - name: Deploy 🚀
+        uses: JamesIves/github-pages-deploy-action@v4
+        if: ${{ github.event_name == 'push' }}
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          branch: gh-pages
+          folder: docs/_build/
+          #folder: docs/_build/html
+          clean: false
+          target-folder: latest
diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml
@@ -1,31 +1,18 @@
-name: Publish package
-
+name: Publish to PyPI.org
 on:
+  workflow_dispatch:
   release:
     types: [published]
-
 jobs:
-  pypi-publish:
+  pypi:
     runs-on: ubuntu-latest
-
     steps:
-    - uses: actions/checkout@v2
-      with:
-        fetch-depth: 0
-
-    - name: Set up Python
-      uses: actions/setup-python@v2
-      with:
-        python-version: '3.x'
-
-    - name: Install build package
-      run: |
-        python -m pip install --upgrade pip
-        python -m pip install build --user
-    - name: Build distribution
-      run: |
-        python -m build --sdist --wheel --outdir dist/ .
-    - name: Publish to PyPI
-      uses: pypa/gh-action-pypi-publish@master
-      with:
-        password: ${{ secrets.PYPI_API_TOKEN }}
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - run: python3 -m pip install --upgrade build && python3 -m build
+      - name: Publish package
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -0,0 +1,9 @@
+version: 2
+
+build:
+  os: "ubuntu-20.04"
+  tools:
+    python: "3.8"
+
+sphinx:
+  fail_on_warning: true
diff --git a/README.md b/README.md
@@ -9,6 +9,10 @@ Python package for lots of Pytorch tools.
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 [![Coverage Status](https://coveralls.io/repos/github/nasa-nccs-hpda/pytorch-caney/badge.svg?branch=main)](https://coveralls.io/github/nasa-nccs-hpda/pytorch-caney?branch=main)
 
+## Documentation
+
+- Latest: https://nasa-nccs-hpda.github.io/pytorch-caney/latest
+
 ## Objectives
 
 - Library to process remote sensing imagery using GPU and CPU parallelization.
@@ -56,7 +60,21 @@ Please see our [guide for contributing to pytorch-caney](CONTRIBUTING.md).
 | name | bands | resolution | #chips |
 | :---: | :---: | :---: | :---: |
 | MODIS-Small | 7 | 128x128 | 1,994,131 |
+
+## MODIS Surface Reflectance (MOD09GA) Band Details
+
+| Band Name      | Bandwidth     |
+| :------------: | :-----------: |
+| sur_refl_b01_1 | 0.620 - 0.670 |
+| sur_refl_b02_1 | 0.841 - 0.876 |
+| sur_refl_b03_1 | 0.459 - 0.479 |
+| sur_refl_b04_1 | 0.545 - 0.565 |
+| sur_refl_b05_1 | 1.230 - 1.250 |
+| sur_refl_b06_1 | 1.628 - 1.652 |
+| sur_refl_b07_1 | 2.105 - 2.155 |
+
 ## Pre-training with Masked Image Modeling
+
 To pre-train the swinv2 base model with masked image modeling pre-training, run:
 ```bash
 torchrun --nproc_per_node <NGPUS> pytorch-caney/pytorch_caney/pipelines/pretraining/mim.py --cfg <config-file> --dataset <dataset-name> --data-paths <path-to-data-subfolder-1> --batch-size <batch-size> --output <output-dir> --enable-amp

diff --git a/README.rst b/README.rst
@@ -0,0 +1,164 @@
+================
+pytorch-caney
+================
+
+Python package for lots of Pytorch tools for geospatial science problems.
+
+.. image:: https://zenodo.org/badge/472450059.svg
+      :target: https://zenodo.org/badge/latestdoi/472450059
+
+Objectives
+------------
+
+- Library to process remote sensing imagery using GPU and CPU parallelization.
+- Machine Learning and Deep Learning image classification and regression.
+- Agnostic array and vector-like data structures.
+- User interface environments via Notebooks for easy to use AI/ML projects.
+- Example notebooks for quick AI/ML start with your own data.
+
+Installation
+----------------
+
+The following library is intended to be used to accelerate the development of data science products
+for remote sensing satellite imagery, or any other applications. pytorch-caney can be installed
+by itself, but instructions for installing the full environments are listed under the requirements
+directory so projects, examples, and notebooks can be run.
+
+Note: PIP installations do not include CUDA libraries for GPU support. Make sure NVIDIA libraries
+are installed locally in the system if not using conda/mamba.
+
+.. code-block:: bash
+
+    module load singularity # if a module needs to be loaded
+    singularity build --sandbox pytorch-caney-container docker://nasanccs/pytorch-caney:latest
+
+
+Why Caney?
+---------------
+
+"Caney" means longhouse in Taíno.
+
+Contributors
+-------------
+
+- Jordan Alexis Caraballo-Vega, [email protected]
+- Caleb Spradlin, [email protected]
+- Jian Li, [email protected]
+
+Contributing
+-------------
+
+Please see our `guide for contributing to pytorch-caney <CONTRIBUTING.md>`_.
+
+SatVision
+------------
+
++---------------+--------------+------------+------------+
+| Name          | Pretrain     | Resolution | Parameters |
++===============+==============+============+============+
+| SatVision-B   | MODIS-1.9-M  | 192x192    | 84.5M      |
++---------------+--------------+------------+------------+
+
+SatVision Datasets
+-----------------------
+
++---------------+-----------+------------+-------------+
+| Name          | Bands     | Resolution | Image Chips |
++===============+===========+============+=============+
+| MODIS-Small   | 7         | 128x128    | 1,994,131   |
++---------------+-----------+------------+-------------+
+
+MODIS Surface Reflectance (MOD09GA) Band Details
+------------------------------------------------------
+
++-----------------+---------------+
+| Band Name       | Bandwidth     |
++=================+===============+
+| sur_refl_b01_1  | 0.620 - 0.670 |
++-----------------+---------------+
+| sur_refl_b02_1  | 0.841 - 0.876 |
++-----------------+---------------+
+| sur_refl_b03_1  | 0.459 - 0.479 |
++-----------------+---------------+
+| sur_refl_b04_1  | 0.545 - 0.565 |
++-----------------+---------------+
+| sur_refl_b05_1  | 1.230 - 1.250 |
++-----------------+---------------+
+| sur_refl_b06_1  | 1.628 - 1.652 |
++-----------------+---------------+
+| sur_refl_b07_1  | 2.105 - 2.155 |
++-----------------+---------------+
+
+Pre-training with Masked Image Modeling
+-----------------------------------------
+
+To pre-train the swinv2 base model with masked image modeling pre-training, run:
+
+.. code-block:: bash
+
+    torchrun --nproc_per_node <NGPUS> pytorch-caney/pytorch_caney/pipelines/pretraining/mim.py --cfg <config-file> --dataset <dataset-name> --data-paths <path-to-data-subfolder-1> --batch-size <batch-size> --output <output-dir> --enable-amp
+
+For example to run on a compute node with 4 GPUs and a batch size of 128 on the MODIS SatVision pre-training dataset with a base swinv2 model, run:
+
+.. code-block:: bash
+
+    singularity shell --nv -B <mounts> /path/to/container/pytorch-caney-container
+    Singularity> export PYTHONPATH=$PWD:$PWD/pytorch-caney
+    Singularity> torchrun --nproc_per_node 4 pytorch-caney/pytorch_caney/pipelines/pretraining/mim.py --cfg pytorch-caney/examples/satvision/mim_pretrain_swinv2_satvision_base_192_window12_800ep.yaml --dataset MODIS --data-paths /explore/nobackup/projects/ilab/data/satvision/pretraining/training_* --batch-size 128 --output . --enable-amp
+
+
+This example script runs the exact configuration used to make the SatVision-base model pre-training with MiM and the MODIS pre-training dataset.
+
+.. code-block:: bash
+
+    singularity shell --nv -B <mounts> /path/to/container/pytorch-caney-container
+    Singularity> cd pytorch-caney/examples/satvision
+    Singularity> ./run_satvision_pretrain.sh
+
+
+Fine-tuning Satvision-base
+-----------------------------
+
+To fine-tune the satvision-base pre-trained model, run:
+
+.. code-block:: bash
+
+    torchrun --nproc_per_node <NGPUS> pytorch-caney/pytorch_caney/pipelines/finetuning/finetune.py --cfg <config-file> --pretrained <path-to-pretrained> --dataset <dataset-name> --data-paths <path-to-data-subfolder-1> --batch-size <batch-size> --output <output-dir> --enable-amp
+
+See example config files pytorch-caney/examples/satvision/finetune_satvision_base_*.yaml to see how to structure your config file for fine-tuning.
+
+
+Testing
+------------
+
+For unittests, run this bash command to run linting and unit test runs. This will execute unit tests and linting in a temporary venv environment only used for testing.
+
+.. code-block:: bash
+
+    git clone [email protected]:nasa-nccs-hpda/pytorch-caney.git
+    cd pytorch-caney; bash test.sh
+
+
+or run unit tests directly with container or anaconda env
+
+.. code-block:: bash
+
+    git clone [email protected]:nasa-nccs-hpda/pytorch-caney.git
+    singularity build --sandbox pytorch-caney-container docker://nasanccs/pytorch-caney:latest
+    singularity shell --nv -B <mounts> /path/to/container/pytorch-caney-container
+    cd pytorch-caney; python -m unittest discover pytorch_caney/tests
+
+.. code-block:: bash
+
+    git clone [email protected]:nasa-nccs-hpda/pytorch-caney.git
+    cd pytorch-caney; conda env create -f requirements/environment_gpu.yml;
+    conda activate pytorch-caney
+    python -m unittest discover pytorch_caney/tests
+
+
+References
+------------
+
+- `Pytorch Lightning <https://github.com/Lightning-AI/lightning>`_ 
+- `Swin Transformer <https://github.com/microsoft/Swin-Transformer>`_ 
+- `SimMIM <https://github.com/microsoft/SimMIM>`_ 
diff --git a/docs/Makefile b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/conf.py b/docs/conf.py
@@ -0,0 +1,52 @@
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath('..'))
+
+import pytorch_caney  # noqa: E402
+
+project = 'pytorch-caney'
+copyright = '2023, Jordan A. Caraballo-Vega'
+author = 'Jordan A. Caraballo-Vega'
+
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx_autodoc_typehints',
+    'jupyter_sphinx.execute',
+    "sphinx.ext.intersphinx",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.napoleon",
+    "sphinx_click.ext",
+    "sphinx.ext.githubpages",
+    "nbsphinx",
+]
+
+intersphinx_mapping = {
+    "pyproj": ("https://pyproj4.github.io/pyproj/stable/", None),
+    "rasterio": ("https://rasterio.readthedocs.io/en/stable/", None),
+    "xarray": ("http://xarray.pydata.org/en/stable/", None),
+}
+
+templates_path = ['_templates']
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+master_doc = "index"
+
+version = release = pytorch_caney.__version__
+
+pygments_style = "sphinx"
+
+todo_include_todos = False
+
+html_theme = 'sphinx_rtd_theme'
+html_logo = 'static/DSG_LOGO_REDESIGN.png'
+
+myst_enable_extensions = [
+    "amsmath",
+    "colon_fence",
+    "deflist",
+    "dollarmath",
+    "html_image",
+]
+
+myst_url_schemes = ("http", "https", "mailto")
diff --git a/docs/examples.rst b/docs/examples.rst
@@ -0,0 +1,3 @@
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents: