diff --git a/science/README.md b/science/README.md index 2946ef8..3c4ab0d 100644 --- a/science/README.md +++ b/science/README.md @@ -10,20 +10,13 @@ Take a look at the [Kedro documentation](https://docs.kedro.org) to get started. The project contains one pipeline for now: `globe` -### `global` +### `lowvshigh` -Pipeline to split nextgems global datasets (low and high resolution) into a set of tiffs (one per timestep) to use in blender to render a rotating globe. +Pipeline to generate the comparisson between low and high resolution simulations. Currently it has: -### +- splits nextgems global datasets into a set of tiffs (one per timestep) to use in blender to render a rotating globe. +- video generation pipeline for a regions defined in `conf/parameters.yml` -## Rules and guidelines - -In order to get the best out of the template: - -* Don't remove any lines from the `.gitignore` file we provide -* Make sure your results can be reproduced by following a [data engineering convention](https://docs.kedro.org/en/stable/faq/faq.html#what-is-data-engineering-convention) -* Don't commit data to your repository -* Don't commit any credentials or your local configuration to your repository. Keep all your credentials and local configuration in `conf/local/` ## How to install dependencies @@ -42,51 +35,36 @@ You can run your Kedro project with: ``` kedro run ``` - -## How to test your Kedro project - -Have a look at the files `src/tests/test_run.py` and `src/tests/pipelines/data_science/test_pipeline.py` for instructions on how to write your tests. Run the tests as follows: +I recomend use the `ParallelRunner` to run the nodes in parallel ``` -pytest +kedro run --runner=ParallelRunner ``` -To configure the coverage threshold, look at the `.coveragerc` file. - -## Project dependencies - -To see and update the dependency requirements for your project use `requirements.txt`. Install the project requirements with `pip install -r requirements.txt`. +### Run a subset of the pipeline -[Further information about project dependencies](https://docs.kedro.org/en/stable/kedro_project_setup/dependencies.html#project-specific-dependencies) - -## How to work with Kedro and notebooks - -> Note: Using `kedro jupyter` or `kedro ipython` to run your notebook provides these variables in scope: `catalog`, `context`, `pipelines` and `session`. -> -> Jupyter, JupyterLab, and IPython are already included in the project requirements by default, so once you have run `pip install -r requirements.txt` you will not need to take any extra steps before you use them. - -### Jupyter +Kedro allows run subsets by selecting only nodes, pipelines or tags. Check the tags in the pipeline code or in kedro viz. +For example to run only the detailed videos pipelines use ``` -kedro jupyter notebook +kedro run --runner=ParallelRunner --tags zoomin ``` -You can also start JupyterLab: -``` -kedro jupyter lab -``` +## Kedro viz -### IPython -And if you want to run an IPython session: +Visualize the pipeline with ``` -kedro ipython +kedro viz ``` -### How to ignore notebook output cells in `git` -To automatically strip out all output cell contents before committing to `git`, you can use tools like [`nbstripout`](https://github.com/kynan/nbstripout). For example, you can add a hook in `.git/config` with `nbstripout --install`. This will run `nbstripout` before anything is committed to `git`. -> *Note:* Your output cells will be retained locally. +## Rules and guidelines -[Further information about using notebooks for experiments within Kedro projects](https://docs.kedro.org/en/develop/notebooks_and_ipython/kedro_and_notebooks.html). \ No newline at end of file +In order to get the best out of the template: + +* Don't remove any lines from the `.gitignore` file we provide +* Make sure your results can be reproduced by following a [data engineering convention](https://docs.kedro.org/en/stable/faq/faq.html#what-is-data-engineering-convention) +* Don't commit data to your repository +* Don't commit any credentials or your local configuration to your repository. Keep all your credentials and local configuration in `conf/local/` \ No newline at end of file diff --git a/science/data/02_intermediate/amazonia-10-parts/.gitkeep b/science/data/02_intermediate/amazonia-10-parts/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/science/data/02_intermediate/amazonia-100-parts/.gitkeep b/science/data/02_intermediate/amazonia-100-parts/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/science/data/02_intermediate/hurricane-10-parts/.gitkeep b/science/data/02_intermediate/hurricane-10-parts/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/science/data/02_intermediate/hurricane-100-parts/.gitkeep b/science/data/02_intermediate/hurricane-100-parts/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/science/data/03_primary/ws-10-parts/.gitkeep b/science/data/03_primary/ws-10-parts/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/science/data/03_primary/ws-100-parts/.gitkeep b/science/data/03_primary/ws-100-parts/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/science/requirements.in b/science/requirements.in new file mode 100644 index 0000000..e1ce58d --- /dev/null +++ b/science/requirements.in @@ -0,0 +1,18 @@ +ipython>=8.10 +jupyterlab>=3.0 +kedro~=0.19.6 +kedro-datasets>=3.0; python_version >= "3.9" +kedro-datasets>=1.0; python_version < "3.9" +kedro-datasets[netcdf, rioxarray] +kedro-telemetry>=0.3.1 +kedro-viz>=6.7.0 +notebook +pytest~=7.2 +pytest-cov~=3.0 +pytest-mock>=1.7.1, <2.0 +ruff~=0.1.8 +matplotlib +cartopy +scikit-image +pillow +opencv-python \ No newline at end of file diff --git a/science/requirements.txt b/science/requirements.txt index dbb1c8a..e702a6d 100644 --- a/science/requirements.txt +++ b/science/requirements.txt @@ -1,15 +1,623 @@ -ipython>=8.10 -jupyterlab>=3.0 -kedro~=0.19.6 -kedro-datasets>=3.0; python_version >= "3.9" -kedro-datasets>=1.0; python_version < "3.9" -kedro-datasets[netcdf, rioxarray] -kedro-telemetry>=0.3.1 -kedro-viz>=6.7.0 -notebook -pytest~=7.2 -pytest-cov~=3.0 -pytest-mock>=1.7.1, <2.0 -ruff~=0.1.8 -matplotlib -cartopy \ No newline at end of file +# This file was autogenerated by uv via the following command: +# uv pip compile requirements.in -o requirements.txt --universal +affine==2.4.0 + # via rasterio +aiofiles==24.1.0 + # via kedro-viz +annotated-types==0.7.0 + # via pydantic +antlr4-python3-runtime==4.9.3 + # via omegaconf +anyio==3.7.1 + # via + # httpx + # jupyter-server + # starlette + # watchfiles + # watchgod +appdirs==1.4.4 + # via kedro-telemetry +appnope==0.1.4 ; platform_system == 'Darwin' + # via ipykernel +argon2-cffi==23.1.0 + # via jupyter-server +argon2-cffi-bindings==21.2.0 + # via argon2-cffi +arrow==1.3.0 + # via + # cookiecutter + # isoduration +asttokens==2.4.1 + # via stack-data +async-lru==2.0.4 + # via jupyterlab +attrs==23.2.0 + # via + # jsonschema + # kedro + # rasterio + # referencing +babel==2.15.0 + # via jupyterlab-server +beautifulsoup4==4.12.3 + # via nbconvert +binaryornot==0.4.4 + # via cookiecutter +bleach==6.1.0 + # via nbconvert +build==1.2.1 + # via kedro +cachetools==5.4.0 + # via kedro +cartopy==0.23.0 + # via -r requirements.in +certifi==2024.7.4 + # via + # httpcore + # httpx + # netcdf4 + # pyproj + # rasterio + # requests +cffi==1.16.0 + # via + # argon2-cffi-bindings + # pyzmq +cftime==1.6.4 + # via netcdf4 +chardet==5.2.0 + # via binaryornot +charset-normalizer==3.3.2 + # via requests +click==8.1.7 + # via + # click-default-group + # click-plugins + # cligj + # cookiecutter + # kedro + # rasterio + # typer + # uvicorn +click-default-group==1.2.4 + # via kedro-viz +click-plugins==1.1.1 + # via rasterio +cligj==0.7.2 + # via rasterio +colorama==0.4.6 ; os_name == 'nt' or platform_system == 'Windows' or sys_platform == 'win32' + # via + # build + # click + # ipython + # pytest + # uvicorn +comm==0.2.2 + # via ipykernel +contourpy==1.2.1 + # via matplotlib +cookiecutter==2.6.0 + # via kedro +coverage==7.6.0 + # via pytest-cov +cycler==0.12.1 + # via matplotlib +debugpy==1.8.2 + # via ipykernel +decorator==5.1.1 + # via ipython +defusedxml==0.7.1 + # via nbconvert +dnspython==2.6.1 + # via email-validator +dynaconf==3.2.6 + # via kedro +email-validator==2.2.0 + # via fastapi +executing==2.0.1 + # via stack-data +fastapi==0.111.1 + # via kedro-viz +fastapi-cli==0.0.4 + # via fastapi +fastjsonschema==2.20.0 + # via nbformat +fonttools==4.53.1 + # via matplotlib +fqdn==1.5.1 + # via jsonschema +fsspec==2024.6.1 + # via + # kedro + # kedro-viz +gitdb==4.0.11 + # via gitpython +gitpython==3.1.43 + # via kedro +graphql-core==3.2.3 + # via strawberry-graphql +greenlet==3.0.3 ; python_version < '3.13' and (platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64') + # via sqlalchemy +h11==0.14.0 + # via + # httpcore + # uvicorn +h5netcdf==1.3.0 + # via kedro-datasets +h5py==3.11.0 + # via h5netcdf +httpcore==1.0.5 + # via httpx +httptools==0.6.1 + # via uvicorn +httpx==0.27.0 + # via + # fastapi + # jupyterlab +idna==3.7 + # via + # anyio + # email-validator + # httpx + # jsonschema + # requests +imageio==2.34.2 + # via scikit-image +importlib-metadata==7.2.1 + # via kedro +importlib-resources==6.4.0 + # via kedro +iniconfig==2.0.0 + # via pytest +ipykernel==6.29.5 + # via jupyterlab +ipython==8.26.0 + # via + # -r requirements.in + # ipykernel + # kedro-viz +isoduration==20.11.0 + # via jsonschema +jedi==0.19.1 + # via ipython +jinja2==3.1.4 + # via + # cookiecutter + # fastapi + # jupyter-server + # jupyterlab + # jupyterlab-server + # nbconvert +json5==0.9.25 + # via jupyterlab-server +jsonpointer==3.0.0 + # via jsonschema +jsonschema==4.23.0 + # via + # jupyter-events + # jupyterlab-server + # nbformat +jsonschema-specifications==2023.12.1 + # via jsonschema +jupyter-client==8.6.2 + # via + # ipykernel + # jupyter-server + # nbclient +jupyter-core==5.7.2 + # via + # ipykernel + # jupyter-client + # jupyter-server + # jupyterlab + # nbclient + # nbconvert + # nbformat +jupyter-events==0.10.0 + # via jupyter-server +jupyter-lsp==2.2.5 + # via jupyterlab +jupyter-server==2.14.2 + # via + # jupyter-lsp + # jupyterlab + # jupyterlab-server + # notebook + # notebook-shim +jupyter-server-terminals==0.5.3 + # via jupyter-server +jupyterlab==4.2.4 + # via + # -r requirements.in + # notebook +jupyterlab-pygments==0.3.0 + # via nbconvert +jupyterlab-server==2.27.3 + # via + # jupyterlab + # notebook +kedro==0.19.6 + # via + # -r requirements.in + # kedro-datasets + # kedro-telemetry + # kedro-viz +kedro-datasets==4.0.0 + # via -r requirements.in +kedro-telemetry==0.5.0 + # via -r requirements.in +kedro-viz==9.1.0 + # via -r requirements.in +kiwisolver==1.4.5 + # via matplotlib +lazy-loader==0.4 + # via + # kedro-datasets + # scikit-image +markdown-it-py==3.0.0 + # via rich +markupsafe==2.1.5 + # via + # jinja2 + # nbconvert +matplotlib==3.9.1 + # via + # -r requirements.in + # cartopy +matplotlib-inline==0.1.7 + # via + # ipykernel + # ipython +mdurl==0.1.2 + # via markdown-it-py +mistune==3.0.2 + # via nbconvert +more-itertools==10.3.0 + # via kedro +nbclient==0.10.0 + # via nbconvert +nbconvert==7.16.4 + # via jupyter-server +nbformat==5.10.4 + # via + # jupyter-server + # nbclient + # nbconvert +nest-asyncio==1.6.0 + # via ipykernel +netcdf4==1.7.1.post1 + # via kedro-datasets +networkx==3.3 + # via + # kedro-viz + # scikit-image +notebook==7.2.1 + # via -r requirements.in +notebook-shim==0.2.4 + # via + # jupyterlab + # notebook +numpy==2.0.1 + # via + # cartopy + # cftime + # contourpy + # h5py + # imageio + # matplotlib + # netcdf4 + # opencv-python + # pandas + # rasterio + # rioxarray + # scikit-image + # scipy + # shapely + # snuggs + # tifffile + # xarray +omegaconf==2.3.0 + # via kedro +opencv-python==4.10.0.84 + # via -r requirements.in +orjson==3.10.6 + # via kedro-viz +overrides==7.7.0 + # via jupyter-server +packaging==23.2 + # via + # build + # cartopy + # h5netcdf + # ipykernel + # jupyter-server + # jupyterlab + # jupyterlab-server + # kedro-viz + # lazy-loader + # matplotlib + # nbconvert + # plotly + # pytest + # pytoolconfig + # rioxarray + # scikit-image + # xarray +pandas==2.2.2 + # via + # kedro-viz + # xarray +pandocfilters==1.5.1 + # via nbconvert +parse==1.20.2 + # via kedro +parso==0.8.4 + # via jedi +pexpect==4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32' + # via ipython +pillow==10.4.0 + # via + # -r requirements.in + # imageio + # matplotlib + # scikit-image +platformdirs==4.2.2 + # via + # jupyter-core + # pytoolconfig +plotly==5.23.0 + # via kedro-viz +pluggy==1.5.0 + # via + # kedro + # pytest +pre-commit-hooks==4.6.0 + # via kedro +prometheus-client==0.20.0 + # via jupyter-server +prompt-toolkit==3.0.47 + # via ipython +psutil==6.0.0 + # via ipykernel +ptyprocess==0.7.0 ; os_name != 'nt' or (sys_platform != 'emscripten' and sys_platform != 'win32') + # via + # pexpect + # terminado +pure-eval==0.2.3 + # via stack-data +pycparser==2.22 + # via cffi +pydantic==2.8.2 + # via + # fastapi + # kedro-viz +pydantic-core==2.20.1 + # via pydantic +pygments==2.18.0 + # via + # ipython + # nbconvert + # rich +pyparsing==3.1.2 + # via + # matplotlib + # snuggs +pyproj==3.6.1 + # via + # cartopy + # rioxarray +pyproject-hooks==1.1.0 + # via build +pyshp==2.3.1 + # via cartopy +pytest==7.4.4 + # via + # -r requirements.in + # pytest-cov + # pytest-mock +pytest-cov==3.0.0 + # via -r requirements.in +pytest-mock==1.13.0 + # via -r requirements.in +python-dateutil==2.9.0.post0 + # via + # arrow + # jupyter-client + # matplotlib + # pandas + # strawberry-graphql +python-dotenv==1.0.1 + # via uvicorn +python-json-logger==2.0.7 + # via jupyter-events +python-multipart==0.0.9 + # via fastapi +python-slugify==8.0.4 + # via cookiecutter +pytoolconfig==1.3.1 + # via rope +pytz==2024.1 + # via pandas +pywin32==306 ; platform_python_implementation != 'PyPy' and sys_platform == 'win32' + # via jupyter-core +pywinpty==2.0.13 ; os_name == 'nt' + # via + # jupyter-server + # jupyter-server-terminals + # terminado +pyyaml==6.0.1 + # via + # cookiecutter + # jupyter-events + # kedro + # omegaconf + # uvicorn +pyzmq==26.0.3 + # via + # ipykernel + # jupyter-client + # jupyter-server +rasterio==1.3.10 + # via rioxarray +referencing==0.35.1 + # via + # jsonschema + # jsonschema-specifications + # jupyter-events +requests==2.32.3 + # via + # cookiecutter + # jupyterlab-server + # kedro-telemetry +rfc3339-validator==0.1.4 + # via + # jsonschema + # jupyter-events +rfc3986-validator==0.1.1 + # via + # jsonschema + # jupyter-events +rich==13.7.1 + # via + # cookiecutter + # kedro + # typer +rioxarray==0.17.0 + # via kedro-datasets +rope==1.13.0 + # via kedro +rpds-py==0.19.1 + # via + # jsonschema + # referencing +ruamel-yaml==0.18.6 + # via pre-commit-hooks +ruamel-yaml-clib==0.2.8 ; python_version < '3.13' and platform_python_implementation == 'CPython' + # via ruamel-yaml +ruff==0.1.15 + # via -r requirements.in +scikit-image==0.24.0 + # via -r requirements.in +scipy==1.14.0 + # via scikit-image +secure==0.3.0 + # via kedro-viz +send2trash==1.8.3 + # via jupyter-server +setuptools==71.1.0 + # via + # jupyterlab + # rasterio +shapely==2.0.5 + # via cartopy +shellingham==1.5.4 + # via typer +six==1.16.0 + # via + # asttokens + # bleach + # python-dateutil + # rfc3339-validator +smmap==5.0.1 + # via gitdb +sniffio==1.3.1 + # via + # anyio + # httpx +snuggs==1.4.7 + # via rasterio +soupsieve==2.5 + # via beautifulsoup4 +sqlalchemy==2.0.31 + # via kedro-viz +stack-data==0.6.3 + # via ipython +starlette==0.37.2 + # via fastapi +strawberry-graphql==0.237.2 + # via kedro-viz +tenacity==8.5.0 + # via plotly +terminado==0.18.1 + # via + # jupyter-server + # jupyter-server-terminals +text-unidecode==1.3 + # via python-slugify +tifffile==2024.7.24 + # via scikit-image +tinycss2==1.3.0 + # via nbconvert +toml==0.10.2 + # via kedro +toposort==1.10 + # via kedro-viz +tornado==6.4.1 + # via + # ipykernel + # jupyter-client + # jupyter-server + # jupyterlab + # notebook + # terminado +traitlets==5.14.3 + # via + # comm + # ipykernel + # ipython + # jupyter-client + # jupyter-core + # jupyter-events + # jupyter-server + # jupyterlab + # matplotlib-inline + # nbclient + # nbconvert + # nbformat +typer==0.12.3 + # via fastapi-cli +types-python-dateutil==2.9.0.20240316 + # via arrow +typing-extensions==4.12.2 + # via + # fastapi + # ipython + # pydantic + # pydantic-core + # sqlalchemy + # strawberry-graphql + # typer +tzdata==2024.1 + # via pandas +uri-template==1.3.0 + # via jsonschema +urllib3==2.2.2 + # via requests +uvicorn==0.29.0 + # via + # fastapi + # kedro-viz +uvloop==0.19.0 ; platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32' + # via uvicorn +watchfiles==0.22.0 + # via uvicorn +watchgod==0.8.2 + # via kedro-viz +wcwidth==0.2.13 + # via prompt-toolkit +webcolors==24.6.0 + # via jsonschema +webencodings==0.5.1 + # via + # bleach + # tinycss2 +websocket-client==1.8.0 + # via jupyter-server +websockets==12.0 + # via uvicorn +xarray==2024.6.0 + # via + # kedro-datasets + # rioxarray +zipp==3.19.2 + # via importlib-metadata diff --git a/science/src/science/pipelines/globe/__init__.py b/science/src/science/pipelines/lowvshigh/__init__.py similarity index 100% rename from science/src/science/pipelines/globe/__init__.py rename to science/src/science/pipelines/lowvshigh/__init__.py diff --git a/science/src/science/pipelines/globe/nodes.py b/science/src/science/pipelines/lowvshigh/nodes.py similarity index 97% rename from science/src/science/pipelines/globe/nodes.py rename to science/src/science/pipelines/lowvshigh/nodes.py index 95290cd..4eda981 100644 --- a/science/src/science/pipelines/globe/nodes.py +++ b/science/src/science/pipelines/lowvshigh/nodes.py @@ -3,7 +3,6 @@ generated using Kedro 0.19.6 """ import logging -from re import I from typing import Any, Callable import cartopy # noqa: F401 @@ -14,7 +13,6 @@ import xarray as xr from kedro_datasets.video.video_dataset import SequenceVideo from PIL import Image -from rasterio.plot import reshape_as_image from skimage.transform import rescale log = logging.getLogger(__name__) diff --git a/science/src/science/pipelines/globe/pipeline.py b/science/src/science/pipelines/lowvshigh/pipeline.py similarity index 98% rename from science/src/science/pipelines/globe/pipeline.py rename to science/src/science/pipelines/lowvshigh/pipeline.py index c39ee72..f35a5bf 100644 --- a/science/src/science/pipelines/globe/pipeline.py +++ b/science/src/science/pipelines/lowvshigh/pipeline.py @@ -2,12 +2,10 @@ This is a boilerplate pipeline 'globe_compact' generated using Kedro 0.19.6 """ -from pydoc import cli - from kedro.pipeline import Pipeline, node from kedro.pipeline.modular_pipeline import pipeline -from science.pipelines.globe.nodes import ( +from science.pipelines.lowvshigh.nodes import ( clip_to_boundary, georef_nextgems_dataset, parts_to_video,