diff --git a/data/.editorconfig b/data/.editorconfig
new file mode 100644
index 00000000..9d000a84
--- /dev/null
+++ b/data/.editorconfig
@@ -0,0 +1,33 @@
+# http://editorconfig.org
+
+root = true
+
+[*]
+indent_style = space
+indent_size = 4
+trim_trailing_whitespace = true
+insert_final_newline = true
+charset = utf-8
+end_of_line = lf
+
+[*.bat]
+indent_style = tab
+end_of_line = crlf
+
+[LICENSE]
+insert_final_newline = false
+
+[Makefile]
+indent_style = tab
+
+[*.py]
+profile = black
+
+[*.md]
+trim_trailing_whitespace = false
+
+[*.yml]
+indent_size = 2
+
+[*.yaml]
+indent_size = 2
diff --git a/data/.gitignore b/data/.gitignore
new file mode 100644
index 00000000..bcfecfb5
--- /dev/null
+++ b/data/.gitignore
@@ -0,0 +1,92 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# DotEnv configuration
+.env
+*/*.env
+
+# Database
+*.db
+*.rdb
+
+# Pycharm
+.idea
+
+# VS Code
+.vscode/
+
+# Spyder
+.spyproject/
+
+# Jupyter NB Checkpoints
+.ipynb_checkpoints/
+
+# exclude data from source control by default
+/data/
+
+# Mac OS-specific storage files
+.DS_Store
+
+# vim
+*.swp
+*.swo
+
+# Mypy cache
+.mypy_cache/
+
+.tox
diff --git a/data/.pre-commit-config.yaml b/data/.pre-commit-config.yaml
new file mode 100644
index 00000000..0effb148
--- /dev/null
+++ b/data/.pre-commit-config.yaml
@@ -0,0 +1,35 @@
+repos:
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v2.3.0
+ hooks:
+ - id: check-yaml
+ - id: end-of-file-fixer
+ - id: trailing-whitespace
+
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.0.275
+ hooks:
+ - id: ruff
+ args: [ "--line-length=100", "--select=E,F,N"]
+
+ - repo: https://github.com/psf/black
+ rev: 23.3.0
+ hooks:
+ - id: black
+ args: [ "--line-length=100" ]
+
+ - repo: https://github.com/nbQA-dev/nbQA
+ rev: 1.7.0
+ hooks:
+ - id: nbqa-black
+ args: [ "--line-length=100", "--check"]
+ - id: nbqa-isort
+ args: [ "--float-to-top", "--profile=black", "--check-only"]
+ - id: nbqa-ruff
+ args: [ "--line-length=100" , "--select=E,F,N"]
+
+ # check for private keys and passwords!
+ - repo: https://github.com/gitleaks/gitleaks
+ rev: v8.16.1
+ hooks:
+ - id: gitleaks
diff --git a/data/CHANGELOG.md b/data/CHANGELOG.md
new file mode 100644
index 00000000..ef74f8c1
--- /dev/null
+++ b/data/CHANGELOG.md
@@ -0,0 +1,19 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+
+## v0.1.0
+
+Unreleased
+
+### Added
+
+### Changed
+
+### Fixed
+
+### Removed
diff --git a/data/Dockerfile b/data/Dockerfile
new file mode 100644
index 00000000..72f9c258
--- /dev/null
+++ b/data/Dockerfile
@@ -0,0 +1,17 @@
+FROM mambaorg/micromamba:1.4.4
+
+COPY --chown=$MAMBA_USER:$MAMBA_USER environment.yml /tmp/environment.yml
+
+RUN micromamba install -n base --yes --file /tmp/environment.yml && \
+ micromamba clean --all --yes
+
+# TODO: export .env in here
+
+# Otherwise python will not be found
+ARG MAMBA_DOCKERFILE_ACTIVATE=1
+
+# Jupyter with Docker Compose
+EXPOSE 8888
+WORKDIR /home/$MAMBA_USER
+
+ENTRYPOINT ["/usr/local/bin/_entrypoint.sh", "jupyter", "lab", "--ip=0.0.0.0","--allow-root", "--no-browser"]
diff --git a/data/LICENSE b/data/LICENSE
new file mode 100644
index 00000000..e08d094b
--- /dev/null
+++ b/data/LICENSE
@@ -0,0 +1,8 @@
+The MIT License (MIT)
+Copyright (c) 2023, Vizzuality
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/data/README.md b/data/README.md
new file mode 100644
index 00000000..276871f8
--- /dev/null
+++ b/data/README.md
@@ -0,0 +1,65 @@
+skytruth_30x30
+==============================
+
+Develop an MVP platform that tracks country progress towards global protection 30% of oceans by 2030
+
+--------
+
+## Setup
+
+### The environment
+To run the notebooks you need to create an environment with the dependencies. There are two options:
+#### Docker
+
+If you have [docker](https://docs.docker.com/engine/install/) in your system,
+you run a jupyter lab server with:
+
+``` bash
+docker compose up --build
+```
+
+And if you want to get into the container, use a terminal in jupyter lab,
+vscode remote development or run this command:
+
+```shell
+docker exec -it skytruth_notebooks /bin/bash
+```
+
+#### Conda environment
+
+Create the environment with:
+
+``` bash
+mamba env create -n skytruth_30x30 -f environment.yml
+```
+This will create an environment called skytruth_30x30 with a common set of dependencies.
+
+### `git` (if needed) and pre-commit hooks
+
+If this project is a new and standalone (not a module in a bigger project), you need to initialize git:
+
+``` bash
+git init
+```
+
+If the project is already in a git repository, you can skip this step.
+
+To install the **pre-commit hooks**, with the environment activated and in the project root directory, run:
+
+``` bash
+pre-commit install
+```
+
+## Update the environment
+
+If you need to update the environment installing a new package, you simply do it with:
+
+``` bash
+mamba install [package] # or `pip install [package]` if you want to install it via pip
+```
+
+then update the environment.yml file so others can clone your environment with:
+
+``` bash
+mamba env export --no-builds -f environment.yml
+```
diff --git a/data/docker-compose.yml b/data/docker-compose.yml
new file mode 100644
index 00000000..d2ffda85
--- /dev/null
+++ b/data/docker-compose.yml
@@ -0,0 +1,14 @@
+version: '3.8'
+services:
+ skytruth-notebooks:
+ build:
+ context: .
+ volumes:
+ - ./data:/home/mambauser/data
+ - ./notebooks:/home/mambauser/notebooks
+ - ./src:/home/mambauser/src
+ ports:
+ - 8888:8888
+ - 8787:8787 # dask dashboard
+ container_name: skytruth_notebooks
+ env_file: .env
diff --git a/data/environment.yml b/data/environment.yml
new file mode 100644
index 00000000..b4814d8e
--- /dev/null
+++ b/data/environment.yml
@@ -0,0 +1,448 @@
+channels:
+ - conda-forge
+dependencies:
+ - affine=2.4.0
+ - aiobotocore=2.5.0
+ - aiohttp=3.8.4
+ - aioitertools=0.11.0
+ - aiosignal=1.3.1
+ - alembic=1.11.1
+ - anyio=3.7.0
+ - appdirs=1.4.4
+ - argon2-cffi=21.3.0
+ - argon2-cffi-bindings=21.2.0
+ - asttokens=2.2.1
+ - async-lru=2.0.2
+ - async-timeout=4.0.2
+ - async_generator=1.10
+ - attrs=23.1.0
+ - autopep8=2.0.2
+ - babel=2.12.1
+ - backcall=0.2.0
+ - backports=1.0
+ - backports.functools_lru_cache=1.6.5
+ - beautifulsoup4=4.12.2
+ - black=23.3.0
+ - bleach=6.0.0
+ - blinker=1.6.2
+ - blosc=1.21.4
+ - bokeh=3.2.0
+ - boost-cpp=1.78.0
+ - botocore=1.29.76
+ - branca=0.6.0
+ - brotli=1.0.9
+ - brotli-bin=1.0.9
+ - brotlipy=0.7.0
+ - bzip2=1.0.8
+ - c-ares=1.19.1
+ - ca-certificates=2023.5.7
+ - cachecontrol=0.13.0
+ - cachecontrol-with-filecache=0.13.0
+ - cached-property=1.5.2
+ - cached_property=1.5.2
+ - cachetools=5.3.0
+ - cachy=0.3.0
+ - cairo=1.16.0
+ - cartopy=0.21.1
+ - certifi=2023.5.7
+ - certipy=0.1.3
+ - cffi=1.15.1
+ - cfgv=3.3.1
+ - cfitsio=4.2.0
+ - cftime=1.6.2
+ - charset-normalizer=3.1.0
+ - click=8.1.3
+ - click-default-group=1.2.2
+ - click-plugins=1.1.1
+ - cligj=0.7.2
+ - clikit=0.6.2
+ - cloudpickle=2.2.1
+ - color-operations=0.1.1
+ - comm=0.1.3
+ - conda-lock=2.1.0
+ - configurable-http-proxy=4.5.4
+ - contourpy=1.1.0
+ - crashtest=0.4.1
+ - cryptography=41.0.1
+ - curl=8.1.2
+ - cycler=0.11.0
+ - cython=0.29.35
+ - cytoolz=0.12.0
+ - dask=2023.6.1
+ - dask-cloudprovider=2022.10.0
+ - dask-core=2023.6.1
+ - dbus=1.13.6
+ - debugpy=1.6.7
+ - decorator=5.1.1
+ - defusedxml=0.7.1
+ - distlib=0.3.6
+ - distributed=2023.6.1
+ - earthengine-api=0.1.357
+ - ensureconda=1.4.3
+ - entrypoints=0.4
+ - exceptiongroup=1.1.1
+ - executing=1.2.0
+ - expat=2.5.0
+ - filelock=3.12.2
+ - fiona=1.9.4
+ - flit-core=3.9.0
+ - folium=0.14.0
+ - font-ttf-dejavu-sans-mono=2.37
+ - font-ttf-inconsolata=3.000
+ - font-ttf-source-code-pro=2.038
+ - font-ttf-ubuntu=0.83
+ - fontconfig=2.14.2
+ - fonts-conda-ecosystem=1
+ - fonts-conda-forge=1
+ - fonttools=4.40.0
+ - freetype=2.12.1
+ - freexl=1.0.6
+ - frozenlist=1.3.3
+ - fsspec=2023.6.0
+ - gcsfs=2023.6.0
+ - gdal=3.7.0
+ - geopandas=0.13.2
+ - geopandas-base=0.13.2
+ - geos=3.11.2
+ - geotiff=1.7.1
+ - gettext=0.21.1
+ - gflags=2.2.2
+ - giflib=5.2.1
+ - gitdb=4.0.10
+ - gitpython=3.1.31
+ - glib=2.76.3
+ - glib-tools=2.76.3
+ - glog=0.6.0
+ - gmp=6.2.1
+ - google-api-core=2.11.1
+ - google-api-python-client=2.91.0
+ - google-auth=2.21.0
+ - google-auth-httplib2=0.1.0
+ - google-auth-oauthlib=1.0.0
+ - google-cloud-core=2.3.2
+ - google-cloud-storage=2.10.0
+ - google-crc32c=1.1.2
+ - google-resumable-media=2.5.0
+ - googleapis-common-protos=1.59.1
+ - graphite2=1.3.13
+ - greenlet=2.0.2
+ - grpcio=1.54.2
+ - gst-plugins-base=1.22.4
+ - gstreamer=1.22.4
+ - h11=0.14.0
+ - h2=4.1.0
+ - h5netcdf=1.2.0
+ - h5py=3.9.0
+ - harfbuzz=7.3.0
+ - hdf4=4.2.15
+ - hdf5=1.14.1
+ - hpack=4.0.0
+ - html5lib=1.1
+ - httpcore=0.17.2
+ - httplib2=0.22.0
+ - httpx=0.24.1
+ - hyperframe=6.0.1
+ - icu=72.1
+ - identify=2.5.24
+ - idna=3.4
+ - importlib-metadata=6.7.0
+ - importlib-resources=5.12.0
+ - importlib_metadata=6.7.0
+ - importlib_resources=5.12.0
+ - ipykernel=6.23.3
+ - ipython=8.14.0
+ - ipython_genutils=0.2.0
+ - ipywidgets=8.0.6
+ - isort=5.12.0
+ - jaraco.classes=3.2.3
+ - jedi=0.18.2
+ - jeepney=0.8.0
+ - jinja2=3.1.2
+ - jmespath=1.0.1
+ - joblib=1.3.0
+ - json-c=0.16
+ - json5=0.9.5
+ - jsonschema=4.17.3
+ - jupyter-lsp=2.2.0
+ - jupyter_client=8.3.0
+ - jupyter_core=5.3.1
+ - jupyter_events=0.6.3
+ - jupyter_server=2.7.0
+ - jupyter_server_terminals=0.4.4
+ - jupyter_telemetry=0.1.0
+ - jupyterhub=4.0.1
+ - jupyterhub-base=4.0.1
+ - jupyterlab=4.0.2
+ - jupyterlab_pygments=0.2.2
+ - jupyterlab_server=2.23.0
+ - jupyterlab_widgets=3.0.7
+ - kealib=1.5.1
+ - keyring=24.2.0
+ - kiwisolver=1.4.4
+ - krb5=1.20.1
+ - lame=3.100
+ - lcms2=2.15
+ - ld_impl_linux-64=2.40
+ - lerc=4.0.0
+ - libaec=1.0.6
+ - libarchive=3.6.2
+ - libarrow=12.0.1
+ - libblas=3.9.0
+ - libbrotlicommon=1.0.9
+ - libbrotlidec=1.0.9
+ - libbrotlienc=1.0.9
+ - libcblas=3.9.0
+ - libclang=15.0.7
+ - libclang13=15.0.7
+ - libcrc32c=1.1.2
+ - libcurl=8.1.2
+ - libdeflate=1.18
+ - libedit=3.1.20191231
+ - libev=4.33
+ - libevent=2.1.12
+ - libexpat=2.5.0
+ - libffi=3.4.2
+ - libflac=1.4.3
+ - libgcrypt=1.10.1
+ - libgdal=3.7.0
+ - libglib=2.76.3
+ - libgoogle-cloud=2.12.0
+ - libgpg-error=1.47
+ - libgrpc=1.54.2
+ - libiconv=1.17
+ - libjpeg-turbo=2.1.5.1
+ - libkml=1.3.0
+ - liblapack=3.9.0
+ - libllvm15=15.0.7
+ - libnetcdf=4.9.2
+ - libnghttp2=1.52.0
+ - libogg=1.3.4
+ - libopenblas=0.3.23
+ - libopus=1.3.1
+ - libpng=1.6.39
+ - libpq=15.3
+ - librttopo=1.1.0
+ - libsndfile=1.2.0
+ - libsodium=1.0.18
+ - libspatialindex=1.9.3
+ - libspatialite=5.0.1
+ - libsqlite=3.42.0
+ - libssh2=1.11.0
+ - libthrift=0.18.1
+ - libtiff=4.5.1
+ - libutf8proc=2.8.0
+ - libuuid=2.38.1
+ - libuv=1.44.2
+ - libvorbis=1.3.7
+ - libwebp-base=1.3.1
+ - libxcb=1.15
+ - libxml2=2.11.4
+ - libzip=1.9.2
+ - libzlib=1.2.13
+ - locket=1.0.0
+ - lockfile=0.12.2
+ - lz4=4.3.2
+ - lz4-c=1.9.4
+ - lzo=2.10
+ - mako=1.2.4
+ - mapclassify=2.5.0
+ - markdown-it-py=3.0.0
+ - markupsafe=2.1.3
+ - matplotlib=3.7.1
+ - matplotlib-base=3.7.1
+ - matplotlib-inline=0.1.6
+ - mdurl=0.1.0
+ - mercantile=1.2.1
+ - metpy=1.5.0
+ - mistune=3.0.0
+ - more-itertools=9.1.0
+ - morecantile=4.2.0
+ - mpg123=1.31.3
+ - msgpack-python=1.0.5
+ - multidict=6.0.4
+ - munch=3.0.0
+ - munkres=1.1.4
+ - mypy_extensions=1.0.0
+ - mysql-common=8.0.33
+ - mysql-libs=8.0.33
+ - nbclassic=1.0.0
+ - nbclient=0.8.0
+ - nbconvert=7.6.0
+ - nbconvert-core=7.6.0
+ - nbconvert-pandoc=7.6.0
+ - nbformat=5.9.0
+ - nbqa=1.7.0
+ - ncurses=6.4
+ - nest-asyncio=1.5.6
+ - netcdf4=1.6.4
+ - networkx=3.1
+ - nodeenv=1.8.0
+ - nodejs=18.16.1
+ - nomkl=1.0
+ - notebook=6.5.4
+ - notebook-shim=0.2.3
+ - nspr=4.35
+ - nss=3.89
+ - numexpr=2.8.4
+ - numpy=1.25.0
+ - oauthlib=3.2.2
+ - openjpeg=2.5.0
+ - openssl=3.1.1
+ - overrides=7.3.1
+ - packaging=23.1
+ - pamela=1.0.0
+ - pandas=2.0.3
+ - pandoc=3.1.3
+ - pandocfilters=1.5.0
+ - parso=0.8.3
+ - partd=1.4.0
+ - pastel=0.2.1
+ - pathspec=0.11.1
+ - patsy=0.5.3
+ - pcre2=10.40
+ - pexpect=4.8.0
+ - pickleshare=0.7.5
+ - pillow=9.5.0
+ - pint=0.22
+ - pip=23.1.2
+ - pixman=0.40.0
+ - pkginfo=1.9.6
+ - pkgutil-resolve-name=1.3.10
+ - platformdirs=3.8.0
+ - ply=3.11
+ - pooch=1.7.0
+ - poppler=23.05.0
+ - poppler-data=0.4.12
+ - postgresql=15.3
+ - pre-commit=3.3.3
+ - proj=9.2.1
+ - prometheus_client=0.17.0
+ - prompt-toolkit=3.0.38
+ - prompt_toolkit=3.0.38
+ - protobuf=4.21.12
+ - psutil=5.9.5
+ - pthread-stubs=0.4
+ - ptyprocess=0.7.0
+ - pure_eval=0.2.2
+ - pyasn1=0.4.8
+ - pyasn1-modules=0.2.7
+ - pycodestyle=2.10.0
+ - pycparser=2.21
+ - pycurl=7.45.1
+ - pydantic=1.10.9
+ - pygments=2.15.1
+ - pyjwt=2.7.0
+ - pylev=1.4.0
+ - pyopenssl=23.2.0
+ - pyparsing=3.1.0
+ - pyproj=3.6.0
+ - pyqt=5.15.7
+ - pyqt5-sip=12.11.0
+ - pyrsistent=0.19.3
+ - pyshp=2.3.1
+ - pysocks=1.7.1
+ - pystac=1.7.3
+ - python=3.11.4
+ - python-dateutil=2.8.2
+ - python-dotenv=1.0.0
+ - python-fastjsonschema=2.17.1
+ - python-json-logger=2.0.7
+ - python-tzdata=2023.3
+ - python_abi=3.11
+ - pytz=2023.3
+ - pyu2f=0.1.5
+ - pyyaml=6.0
+ - pyzmq=25.1.0
+ - qt-main=5.15.8
+ - rasterio=1.3.7
+ - re2=2023.03.02
+ - readline=8.2
+ - requests=2.31.0
+ - requests-oauthlib=1.3.1
+ - rfc3339-validator=0.1.4
+ - rfc3986-validator=0.1.1
+ - rich=13.4.2
+ - rio-cogeo=2.0.1
+ - rio-tiler=5.0.1
+ - rioxarray=0.14.1
+ - rsa=4.9
+ - rtree=1.0.1
+ - ruamel.yaml=0.17.32
+ - ruamel.yaml.clib=0.2.7
+ - ruff=0.0.275
+ - s2n=1.3.45
+ - s3fs=2023.6.0
+ - scikit-learn=1.2.2
+ - scipy=1.11.1
+ - seaborn=0.12.2
+ - seaborn-base=0.12.2
+ - send2trash=1.8.2
+ - setuptools=68.0.0
+ - shapely=2.0.1
+ - sip=6.7.9
+ - six=1.16.0
+ - smmap=3.0.5
+ - snappy=1.1.10
+ - sniffio=1.3.0
+ - snuggs=1.4.7
+ - sortedcontainers=2.4.0
+ - soupsieve=2.3.2.post1
+ - sqlalchemy=2.0.17
+ - sqlite=3.42.0
+ - stack_data=0.6.2
+ - statsmodels=0.14.0
+ - supermercado=0.2.0
+ - tblib=1.7.0
+ - terminado=0.17.1
+ - threadpoolctl=3.1.0
+ - tiledb=2.13.2
+ - tinycss2=1.2.1
+ - tk=8.6.12
+ - tokenize-rt=5.1.0
+ - toml=0.10.2
+ - tomli=2.0.1
+ - tomlkit=0.11.8
+ - toolz=0.12.0
+ - tornado=6.3.2
+ - traitlets=5.9.0
+ - typing-extensions=4.7.0
+ - typing_extensions=4.7.0
+ - typing_utils=0.1.0
+ - tzcode=2023c
+ - tzdata=2023c
+ - ukkonen=1.0.1
+ - uritemplate=4.1.1
+ - urllib3=1.26.15
+ - virtualenv=20.23.1
+ - watchdog=3.0.0
+ - wcwidth=0.2.6
+ - webencodings=0.5.1
+ - websocket-client=1.6.1
+ - wheel=0.40.0
+ - widgetsnbextension=4.0.7
+ - wrapt=1.15.0
+ - xarray=2023.6.0
+ - xerces-c=3.2.4
+ - xorg-kbproto=1.0.7
+ - xorg-libice=1.1.1
+ - xorg-libsm=1.2.4
+ - xorg-libx11=1.8.6
+ - xorg-libxau=1.0.11
+ - xorg-libxdmcp=1.1.3
+ - xorg-libxext=1.3.4
+ - xorg-libxrender=0.9.10
+ - xorg-renderproto=0.11.1
+ - xorg-xextproto=7.3.0
+ - xorg-xf86vidmodeproto=2.3.1
+ - xorg-xproto=7.0.31
+ - xyzservices=2023.5.0
+ - xz=5.2.6
+ - yaml=0.2.5
+ - yarl=1.9.2
+ - zeromq=4.3.4
+ - zict=3.0.0
+ - zipp=3.15.0
+ - zlib=1.2.13
+ - zstd=1.5.2
+ - pip:
+ - jupyterlab-code-formatter==2.2.1
diff --git a/data/notebooks/MPAtlas_table.ipynb b/data/notebooks/MPAtlas_table.ipynb
new file mode 100644
index 00000000..40a2613b
--- /dev/null
+++ b/data/notebooks/MPAtlas_table.ipynb
@@ -0,0 +1,717 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Set up"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import geopandas as gpd\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path_in = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/raw\"\n",
+ "path_out = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/processed/mpatlas\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### MPAtlas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Read data\n",
+ "mpatlas = gpd.read_file(path_in + \"/MPAtlas_largest100.shp\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['OBJECTID', 'WDPAID', 'WDPA_PID', 'NAME', 'English_De', 'PARENT_ISO',\n",
+ " 'ISO3', 'MPA_Marine', 'mpa_id', 'Zone_Marin', 'IUCN_Cat', 'Stage_of_E',\n",
+ " 'Distant_MP', 'Level_of_P', 'Most_Impac', 'Descrip_Im', 'Vertically',\n",
+ " 'SHAPE_Leng', 'SHAPE_Area', 'geometry'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mpatlas.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['Incompatible', 'Highly', 'TBD', 'Fully', 'Lightly', 'Unknown'],\n",
+ " dtype=object)"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mpatlas.Level_of_P.unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " OBJECTID | \n",
+ " WDPAID | \n",
+ " WDPA_PID | \n",
+ " NAME | \n",
+ " English_De | \n",
+ " PARENT_ISO | \n",
+ " ISO3 | \n",
+ " MPA_Marine | \n",
+ " mpa_id | \n",
+ " Zone_Marin | \n",
+ " ... | \n",
+ " Stage_of_E | \n",
+ " Distant_MP | \n",
+ " Level_of_P | \n",
+ " Most_Impac | \n",
+ " Descrip_Im | \n",
+ " Vertically | \n",
+ " SHAPE_Leng | \n",
+ " SHAPE_Area | \n",
+ " geometry | \n",
+ " P_LEVEL | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " 478053.0 | \n",
+ " 478053 | \n",
+ " Hikurangi Deep | \n",
+ " Benthic Protection Area | \n",
+ " NZL | \n",
+ " NZL | \n",
+ " 54022.1 | \n",
+ " 5258 | \n",
+ " 54022.1 | \n",
+ " ... | \n",
+ " Implemented | \n",
+ " NaN | \n",
+ " Incompatible | \n",
+ " Mining, Fishing | \n",
+ " Benthic protections only. Deep sea mining allo... | \n",
+ " X | \n",
+ " 12.332952 | \n",
+ " 5.833001 | \n",
+ " POLYGON ((-175.00000 -42.16661, -175.00000 -42... | \n",
+ " Less Protected / Unknown | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2.0 | \n",
+ " 555512062.0 | \n",
+ " 555512062 | \n",
+ " Kermadec | \n",
+ " Benthic Protection Area | \n",
+ " NZL | \n",
+ " NZL | \n",
+ " 619146.0 | \n",
+ " 5428 | \n",
+ " 458540.5 | \n",
+ " ... | \n",
+ " Implemented | \n",
+ " NaN | \n",
+ " Incompatible | \n",
+ " Mining, Fishing | \n",
+ " Benthic protections only. Deep sea mining allo... | \n",
+ " NaN | \n",
+ " 25.629352 | \n",
+ " 42.963159 | \n",
+ " POLYGON ((-174.02370 -29.22191, -174.02370 -29... | \n",
+ " Less Protected / Unknown | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2 rows × 21 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " OBJECTID WDPAID WDPA_PID NAME English_De \\\n",
+ "0 1.0 478053.0 478053 Hikurangi Deep Benthic Protection Area \n",
+ "1 2.0 555512062.0 555512062 Kermadec Benthic Protection Area \n",
+ "\n",
+ " PARENT_ISO ISO3 MPA_Marine mpa_id Zone_Marin ... Stage_of_E Distant_MP \\\n",
+ "0 NZL NZL 54022.1 5258 54022.1 ... Implemented NaN \n",
+ "1 NZL NZL 619146.0 5428 458540.5 ... Implemented NaN \n",
+ "\n",
+ " Level_of_P Most_Impac \\\n",
+ "0 Incompatible Mining, Fishing \n",
+ "1 Incompatible Mining, Fishing \n",
+ "\n",
+ " Descrip_Im Vertically SHAPE_Leng \\\n",
+ "0 Benthic protections only. Deep sea mining allo... X 12.332952 \n",
+ "1 Benthic protections only. Deep sea mining allo... NaN 25.629352 \n",
+ "\n",
+ " SHAPE_Area geometry \\\n",
+ "0 5.833001 POLYGON ((-175.00000 -42.16661, -175.00000 -42... \n",
+ "1 42.963159 POLYGON ((-174.02370 -29.22191, -174.02370 -29... \n",
+ "\n",
+ " P_LEVEL \n",
+ "0 Less Protected / Unknown \n",
+ "1 Less Protected / Unknown \n",
+ "\n",
+ "[2 rows x 21 columns]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Create new column with protection level reclassified\n",
+ "def map_protection_level(value):\n",
+ " if value in [\"Fully\", \"Highly\"]:\n",
+ " return \"Fully / Highly Protected\"\n",
+ " else:\n",
+ " return \"Less Protected / Unknown\"\n",
+ "\n",
+ "# Create a new column based on column1\n",
+ "mpatlas['P_LEVEL'] = mpatlas['Level_of_P'].apply(map_protection_level)\n",
+ "mpatlas.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n",
+ "regions_data = [\n",
+ " {\n",
+ " 'region_iso': 'AS',\n",
+ " 'region_name': 'Asia & Pacific',\n",
+ " 'country_iso_3s': [\n",
+ " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n",
+ " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n",
+ " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n",
+ " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'AF',\n",
+ " 'region_name': 'Africa',\n",
+ " 'country_iso_3s': [\n",
+ " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n",
+ " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n",
+ " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n",
+ " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'EU',\n",
+ " 'region_name': 'Europe',\n",
+ " 'country_iso_3s': [\n",
+ " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n",
+ " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n",
+ " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n",
+ " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n",
+ " \"UZB\", \"VAT\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'SA',\n",
+ " 'region_name': 'Latin America & Caribbean',\n",
+ " 'country_iso_3s': [\n",
+ " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n",
+ " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n",
+ " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n",
+ " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'PO',\n",
+ " 'region_name': 'Polar',\n",
+ " 'country_iso_3s': [\n",
+ " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'NA',\n",
+ " 'region_name': 'North America',\n",
+ " 'country_iso_3s': [\n",
+ " \"CAN\", \"SPM\", \"USA\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'GL',\n",
+ " 'region_name': 'Global',\n",
+ " 'country_iso_3s': []\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'WA',\n",
+ " 'region_name': 'West Asia',\n",
+ " 'country_iso_3s': [\n",
+ " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n",
+ " ]\n",
+ " }\n",
+ "]\n",
+ "\n",
+ "# Convert the region data to a dictionary that maps each country to its region name\n",
+ "country_to_region = {}\n",
+ "for region in regions_data:\n",
+ " for country in region['country_iso_3s']:\n",
+ " country_to_region[country] = region['region_name']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " OBJECTID | \n",
+ " WDPAID | \n",
+ " WDPA_PID | \n",
+ " NAME | \n",
+ " English_De | \n",
+ " PARENT_ISO | \n",
+ " ISO3 | \n",
+ " MPA_Marine | \n",
+ " mpa_id | \n",
+ " Zone_Marin | \n",
+ " ... | \n",
+ " Distant_MP | \n",
+ " Level_of_P | \n",
+ " Most_Impac | \n",
+ " Descrip_Im | \n",
+ " Vertically | \n",
+ " SHAPE_Leng | \n",
+ " SHAPE_Area | \n",
+ " geometry | \n",
+ " P_LEVEL | \n",
+ " REGIONS | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " 478053.0 | \n",
+ " 478053 | \n",
+ " Hikurangi Deep | \n",
+ " Benthic Protection Area | \n",
+ " NZL | \n",
+ " NZL | \n",
+ " 54022.1 | \n",
+ " 5258 | \n",
+ " 54022.1 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " Incompatible | \n",
+ " Mining, Fishing | \n",
+ " Benthic protections only. Deep sea mining allo... | \n",
+ " X | \n",
+ " 12.332952 | \n",
+ " 5.833001 | \n",
+ " POLYGON ((-175.00000 -42.16661, -175.00000 -42... | \n",
+ " Less Protected / Unknown | \n",
+ " Asia & Pacific | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2.0 | \n",
+ " 555512062.0 | \n",
+ " 555512062 | \n",
+ " Kermadec | \n",
+ " Benthic Protection Area | \n",
+ " NZL | \n",
+ " NZL | \n",
+ " 619146.0 | \n",
+ " 5428 | \n",
+ " 458540.5 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " Incompatible | \n",
+ " Mining, Fishing | \n",
+ " Benthic protections only. Deep sea mining allo... | \n",
+ " NaN | \n",
+ " 25.629352 | \n",
+ " 42.963159 | \n",
+ " POLYGON ((-174.02370 -29.22191, -174.02370 -29... | \n",
+ " Less Protected / Unknown | \n",
+ " Asia & Pacific | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2 rows × 22 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " OBJECTID WDPAID WDPA_PID NAME English_De \\\n",
+ "0 1.0 478053.0 478053 Hikurangi Deep Benthic Protection Area \n",
+ "1 2.0 555512062.0 555512062 Kermadec Benthic Protection Area \n",
+ "\n",
+ " PARENT_ISO ISO3 MPA_Marine mpa_id Zone_Marin ... Distant_MP \\\n",
+ "0 NZL NZL 54022.1 5258 54022.1 ... NaN \n",
+ "1 NZL NZL 619146.0 5428 458540.5 ... NaN \n",
+ "\n",
+ " Level_of_P Most_Impac \\\n",
+ "0 Incompatible Mining, Fishing \n",
+ "1 Incompatible Mining, Fishing \n",
+ "\n",
+ " Descrip_Im Vertically SHAPE_Leng \\\n",
+ "0 Benthic protections only. Deep sea mining allo... X 12.332952 \n",
+ "1 Benthic protections only. Deep sea mining allo... NaN 25.629352 \n",
+ "\n",
+ " SHAPE_Area geometry \\\n",
+ "0 5.833001 POLYGON ((-175.00000 -42.16661, -175.00000 -42... \n",
+ "1 42.963159 POLYGON ((-174.02370 -29.22191, -174.02370 -29... \n",
+ "\n",
+ " P_LEVEL REGIONS \n",
+ "0 Less Protected / Unknown Asia & Pacific \n",
+ "1 Less Protected / Unknown Asia & Pacific \n",
+ "\n",
+ "[2 rows x 22 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mpatlas['REGIONS'] = mpatlas['ISO3'].map(country_to_region)\n",
+ "mpatlas.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " WDPAID | \n",
+ " WDPA_PID | \n",
+ " NAME | \n",
+ " AREA_MPATLAS | \n",
+ " DESIG_ENG | \n",
+ " ESTABLISHMENT | \n",
+ " IMPACT | \n",
+ " P_LEVEL | \n",
+ " PARENT_ISO | \n",
+ " ISO3 | \n",
+ " REGIONS | \n",
+ " geometry | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 478053.0 | \n",
+ " 478053 | \n",
+ " Hikurangi Deep | \n",
+ " 54022.1 | \n",
+ " Benthic Protection Area | \n",
+ " Implemented | \n",
+ " Mining, Fishing | \n",
+ " Less Protected / Unknown | \n",
+ " NZL | \n",
+ " NZL | \n",
+ " Asia & Pacific | \n",
+ " POLYGON ((-175.00000 -42.16661, -175.00000 -42... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 555512062.0 | \n",
+ " 555512062 | \n",
+ " Kermadec | \n",
+ " 458540.5 | \n",
+ " Benthic Protection Area | \n",
+ " Implemented | \n",
+ " Mining, Fishing | \n",
+ " Less Protected / Unknown | \n",
+ " NZL | \n",
+ " NZL | \n",
+ " Asia & Pacific | \n",
+ " POLYGON ((-174.02370 -29.22191, -174.02370 -29... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " WDPAID WDPA_PID NAME AREA_MPATLAS \\\n",
+ "0 478053.0 478053 Hikurangi Deep 54022.1 \n",
+ "1 555512062.0 555512062 Kermadec 458540.5 \n",
+ "\n",
+ " DESIG_ENG ESTABLISHMENT IMPACT \\\n",
+ "0 Benthic Protection Area Implemented Mining, Fishing \n",
+ "1 Benthic Protection Area Implemented Mining, Fishing \n",
+ "\n",
+ " P_LEVEL PARENT_ISO ISO3 REGIONS \\\n",
+ "0 Less Protected / Unknown NZL NZL Asia & Pacific \n",
+ "1 Less Protected / Unknown NZL NZL Asia & Pacific \n",
+ "\n",
+ " geometry \n",
+ "0 POLYGON ((-175.00000 -42.16661, -175.00000 -42... \n",
+ "1 POLYGON ((-174.02370 -29.22191, -174.02370 -29... "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Rename columns and keep only relevant ones. \n",
+ "# Note: We keep \"Zone_Marine\" (area of the geometry), instead of \"MPA_Marine\" (as MPAs can be divided in smaller pieces according to their protection levels)\n",
+ "\n",
+ "mpatlas = mpatlas.rename(columns={'English_De': 'DESIG_ENG', 'Zone_Marin': 'AREA_MPATLAS', 'Stage_of_E': 'ESTABLISHMENT', 'Most_Impac': 'IMPACT' }) \n",
+ "mpatlas2 = mpatlas[['WDPAID', 'WDPA_PID', 'NAME', 'AREA_MPATLAS', 'DESIG_ENG', 'ESTABLISHMENT', 'IMPACT', 'P_LEVEL', 'PARENT_ISO', 'ISO3','REGIONS', 'geometry']]\n",
+ "mpatlas2.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/var/folders/98/0pdnjc5s29x2pnzl293pw7hr0000gn/T/ipykernel_25742/67511564.py:1: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n",
+ " mpatlas2.to_file(path_out + \"/mpatlas_table.shp\")\n"
+ ]
+ }
+ ],
+ "source": [
+ "mpatlas2.to_file(path_out + \"/mpatlas_table.shp\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " WDPAID | \n",
+ " WDPA_PID | \n",
+ " NAME | \n",
+ " AREA_MPATL | \n",
+ " DESIG_ENG | \n",
+ " ESTABLISHM | \n",
+ " IMPACT | \n",
+ " P_LEVEL | \n",
+ " PARENT_ISO | \n",
+ " ISO3 | \n",
+ " REGIONS | \n",
+ " geometry | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 478053.0 | \n",
+ " 478053 | \n",
+ " Hikurangi Deep | \n",
+ " 54022.1 | \n",
+ " Benthic Protection Area | \n",
+ " Implemented | \n",
+ " Mining, Fishing | \n",
+ " Less Protected / Unknown | \n",
+ " NZL | \n",
+ " NZL | \n",
+ " Asia & Pacific | \n",
+ " POLYGON ((-175.00000 -42.16661, -175.00000 -42... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 555512062.0 | \n",
+ " 555512062 | \n",
+ " Kermadec | \n",
+ " 458540.5 | \n",
+ " Benthic Protection Area | \n",
+ " Implemented | \n",
+ " Mining, Fishing | \n",
+ " Less Protected / Unknown | \n",
+ " NZL | \n",
+ " NZL | \n",
+ " Asia & Pacific | \n",
+ " POLYGON ((-174.02370 -29.22191, -174.02370 -29... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " WDPAID WDPA_PID NAME AREA_MPATL \\\n",
+ "0 478053.0 478053 Hikurangi Deep 54022.1 \n",
+ "1 555512062.0 555512062 Kermadec 458540.5 \n",
+ "\n",
+ " DESIG_ENG ESTABLISHM IMPACT \\\n",
+ "0 Benthic Protection Area Implemented Mining, Fishing \n",
+ "1 Benthic Protection Area Implemented Mining, Fishing \n",
+ "\n",
+ " P_LEVEL PARENT_ISO ISO3 REGIONS \\\n",
+ "0 Less Protected / Unknown NZL NZL Asia & Pacific \n",
+ "1 Less Protected / Unknown NZL NZL Asia & Pacific \n",
+ "\n",
+ " geometry \n",
+ "0 POLYGON ((-175.00000 -42.16661, -175.00000 -42... \n",
+ "1 POLYGON ((-174.02370 -29.22191, -174.02370 -29... "
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mpatlas = gpd.read_file(path_out + \"/mpatlas_table.shp\")\n",
+ "mpatlas.head(2)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "skytruth",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/data/notebooks/layers.ipynb b/data/notebooks/layers.ipynb
new file mode 100644
index 00000000..3d9f2c16
--- /dev/null
+++ b/data/notebooks/layers.ipynb
@@ -0,0 +1,949 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import geopandas as gpd\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path_in = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/raw\"\n",
+ "path_out = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/processed\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MRGID | \n",
+ " GEONAME | \n",
+ " MRGID_TER1 | \n",
+ " POL_TYPE | \n",
+ " MRGID_SOV1 | \n",
+ " TERRITORY1 | \n",
+ " ISO_TER1 | \n",
+ " SOVEREIGN1 | \n",
+ " MRGID_TER2 | \n",
+ " MRGID_SOV2 | \n",
+ " ... | \n",
+ " ISO_SOV1 | \n",
+ " ISO_SOV2 | \n",
+ " ISO_SOV3 | \n",
+ " UN_SOV1 | \n",
+ " UN_SOV2 | \n",
+ " UN_SOV3 | \n",
+ " UN_TER1 | \n",
+ " UN_TER2 | \n",
+ " UN_TER3 | \n",
+ " geometry | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 8444.0 | \n",
+ " American Samoa Exclusive Economic Zone | \n",
+ " 8670.0 | \n",
+ " 200NM | \n",
+ " 2204.0 | \n",
+ " American Samoa | \n",
+ " ASM | \n",
+ " United States | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " USA | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 840 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 16.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " POLYGON ((-166.64112 -17.55527, -166.64194 -17... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1 rows × 32 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MRGID GEONAME MRGID_TER1 POL_TYPE \\\n",
+ "0 8444.0 American Samoa Exclusive Economic Zone 8670.0 200NM \n",
+ "\n",
+ " MRGID_SOV1 TERRITORY1 ISO_TER1 SOVEREIGN1 MRGID_TER2 MRGID_SOV2 \\\n",
+ "0 2204.0 American Samoa ASM United States 0.0 0.0 \n",
+ "\n",
+ " ... ISO_SOV1 ISO_SOV2 ISO_SOV3 UN_SOV1 UN_SOV2 UN_SOV3 UN_TER1 UN_TER2 \\\n",
+ "0 ... USA NaN NaN 840 NaN NaN 16.0 NaN \n",
+ "\n",
+ " UN_TER3 geometry \n",
+ "0 NaN POLYGON ((-166.64112 -17.55527, -166.64194 -17... \n",
+ "\n",
+ "[1 rows x 32 columns]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "eez = gpd.read_file(path_in + \"/World_EEZ_v11_20191118/eez_v11.shp\")\n",
+ "eez.head(1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['MRGID', 'GEONAME', 'MRGID_TER1', 'POL_TYPE', 'MRGID_SOV1',\n",
+ " 'TERRITORY1', 'ISO_TER1', 'SOVEREIGN1', 'MRGID_TER2', 'MRGID_SOV2',\n",
+ " 'TERRITORY2', 'ISO_TER2', 'SOVEREIGN2', 'MRGID_TER3', 'MRGID_SOV3',\n",
+ " 'TERRITORY3', 'ISO_TER3', 'SOVEREIGN3', 'X_1', 'Y_1', 'MRGID_EEZ',\n",
+ " 'AREA_KM2', 'ISO_SOV1', 'ISO_SOV2', 'ISO_SOV3', 'UN_SOV1', 'UN_SOV2',\n",
+ " 'UN_SOV3', 'UN_TER1', 'UN_TER2', 'UN_TER3', 'geometry'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "eez.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\n",
+ "Name: WGS 84\n",
+ "Axis Info [ellipsoidal]:\n",
+ "- Lat[north]: Geodetic latitude (degree)\n",
+ "- Lon[east]: Geodetic longitude (degree)\n",
+ "Area of Use:\n",
+ "- name: World.\n",
+ "- bounds: (-180.0, -90.0, 180.0, 90.0)\n",
+ "Datum: World Geodetic System 1984 ensemble\n",
+ "- Ellipsoid: WGS 84\n",
+ "- Prime Meridian: Greenwich"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "eez.crs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Check if eez geometries are valid\n",
+ "sum(eez.geometry.is_valid)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MRGID | \n",
+ " GEONAME | \n",
+ " MRGID_TER1 | \n",
+ " POL_TYPE | \n",
+ " MRGID_SOV1 | \n",
+ " TERRITORY1 | \n",
+ " ISO_TER1 | \n",
+ " SOVEREIGN1 | \n",
+ " MRGID_TER2 | \n",
+ " MRGID_SOV2 | \n",
+ " ... | \n",
+ " ISO_SOV1 | \n",
+ " ISO_SOV2 | \n",
+ " ISO_SOV3 | \n",
+ " UN_SOV1 | \n",
+ " UN_SOV2 | \n",
+ " UN_SOV3 | \n",
+ " UN_TER1 | \n",
+ " UN_TER2 | \n",
+ " UN_TER3 | \n",
+ " geometry | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 8444.0 | \n",
+ " American Samoa Exclusive Economic Zone | \n",
+ " 8670.0 | \n",
+ " 200NM | \n",
+ " 2204.0 | \n",
+ " American Samoa | \n",
+ " ASM | \n",
+ " United States | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " USA | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 840 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 16.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " POLYGON ((-16216412.543 -2157569.856, -1621650... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 8379.0 | \n",
+ " Ascension Exclusive Economic Zone | \n",
+ " 8620.0 | \n",
+ " 200NM | \n",
+ " 2208.0 | \n",
+ " Ascension | \n",
+ " SHN | \n",
+ " United Kingdom | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " GBR | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 826 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 654.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " POLYGON ((-1089355.142 -974062.004, -1089348.4... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 8446.0 | \n",
+ " Cook Islands Exclusive Economic Zone | \n",
+ " 8672.0 | \n",
+ " 200NM | \n",
+ " 2227.0 | \n",
+ " Cook Islands | \n",
+ " COK | \n",
+ " New Zealand | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " NZL | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 554 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 184.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " POLYGON ((-15912583.852 -716733.193, -15813064... | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 8389.0 | \n",
+ " Overlapping claim Falkland / Malvinas Islands:... | \n",
+ " 8623.0 | \n",
+ " Overlapping claim | \n",
+ " 2208.0 | \n",
+ " Falkland / Malvinas Islands | \n",
+ " FLK | \n",
+ " United Kingdom | \n",
+ " 8623.0 | \n",
+ " 2149.0 | \n",
+ " ... | \n",
+ " GBR | \n",
+ " ARG | \n",
+ " NaN | \n",
+ " 826 | \n",
+ " 32.0 | \n",
+ " NaN | \n",
+ " 238.0 | \n",
+ " 238.0 | \n",
+ " NaN | \n",
+ " POLYGON ((-4061728.309 -6509190.466, -4443979.... | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 8440.0 | \n",
+ " French Polynesian Exclusive Economic Zone | \n",
+ " 8656.0 | \n",
+ " 200NM | \n",
+ " 17.0 | \n",
+ " French Polynesia | \n",
+ " PYF | \n",
+ " France | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " FRA | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 250 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 258.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " MULTIPOLYGON (((-13543804.433 -974376.651, -13... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 32 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MRGID GEONAME MRGID_TER1 \\\n",
+ "0 8444.0 American Samoa Exclusive Economic Zone 8670.0 \n",
+ "1 8379.0 Ascension Exclusive Economic Zone 8620.0 \n",
+ "2 8446.0 Cook Islands Exclusive Economic Zone 8672.0 \n",
+ "3 8389.0 Overlapping claim Falkland / Malvinas Islands:... 8623.0 \n",
+ "4 8440.0 French Polynesian Exclusive Economic Zone 8656.0 \n",
+ "\n",
+ " POL_TYPE MRGID_SOV1 TERRITORY1 ISO_TER1 \\\n",
+ "0 200NM 2204.0 American Samoa ASM \n",
+ "1 200NM 2208.0 Ascension SHN \n",
+ "2 200NM 2227.0 Cook Islands COK \n",
+ "3 Overlapping claim 2208.0 Falkland / Malvinas Islands FLK \n",
+ "4 200NM 17.0 French Polynesia PYF \n",
+ "\n",
+ " SOVEREIGN1 MRGID_TER2 MRGID_SOV2 ... ISO_SOV1 ISO_SOV2 ISO_SOV3 \\\n",
+ "0 United States 0.0 0.0 ... USA NaN NaN \n",
+ "1 United Kingdom 0.0 0.0 ... GBR NaN NaN \n",
+ "2 New Zealand 0.0 0.0 ... NZL NaN NaN \n",
+ "3 United Kingdom 8623.0 2149.0 ... GBR ARG NaN \n",
+ "4 France 0.0 0.0 ... FRA NaN NaN \n",
+ "\n",
+ " UN_SOV1 UN_SOV2 UN_SOV3 UN_TER1 UN_TER2 UN_TER3 \\\n",
+ "0 840 NaN NaN 16.0 NaN NaN \n",
+ "1 826 NaN NaN 654.0 NaN NaN \n",
+ "2 554 NaN NaN 184.0 NaN NaN \n",
+ "3 826 32.0 NaN 238.0 238.0 NaN \n",
+ "4 250 NaN NaN 258.0 NaN NaN \n",
+ "\n",
+ " geometry \n",
+ "0 POLYGON ((-16216412.543 -2157569.856, -1621650... \n",
+ "1 POLYGON ((-1089355.142 -974062.004, -1089348.4... \n",
+ "2 POLYGON ((-15912583.852 -716733.193, -15813064... \n",
+ "3 POLYGON ((-4061728.309 -6509190.466, -4443979.... \n",
+ "4 MULTIPOLYGON (((-13543804.433 -974376.651, -13... \n",
+ "\n",
+ "[5 rows x 32 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "eez = eez.to_crs('ESRI:54009')\n",
+ "eez['AREA_KM2']= eez.geometry.area/ 1000000\n",
+ "eez.head(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "eez.to_file(path_out + \"/administrative/eez_area_mollweide.shp\", driver=\"ESRI Shapefile\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Allocating 16 GB of heap memory\n",
+ "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shp\n",
+ "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shx\n",
+ "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.dbf\n",
+ "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.prj\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Reproject to 4626\n",
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shp -proj EPSG:4326 -o force /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_area_mollweide.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n",
+ "regions_data = [\n",
+ " {\n",
+ " 'region_iso': 'AS',\n",
+ " 'region_name': 'Asia & Pacific',\n",
+ " 'country_iso_3s': [\n",
+ " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n",
+ " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n",
+ " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n",
+ " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'AF',\n",
+ " 'region_name': 'Africa',\n",
+ " 'country_iso_3s': [\n",
+ " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n",
+ " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n",
+ " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n",
+ " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'EU',\n",
+ " 'region_name': 'Europe',\n",
+ " 'country_iso_3s': [\n",
+ " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n",
+ " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n",
+ " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n",
+ " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n",
+ " \"UZB\", \"VAT\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'SA',\n",
+ " 'region_name': 'Latin America & Caribbean',\n",
+ " 'country_iso_3s': [\n",
+ " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n",
+ " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n",
+ " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n",
+ " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'PO',\n",
+ " 'region_name': 'Polar',\n",
+ " 'country_iso_3s': [\n",
+ " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'NA',\n",
+ " 'region_name': 'North America',\n",
+ " 'country_iso_3s': [\n",
+ " \"CAN\", \"SPM\", \"USA\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'GL',\n",
+ " 'region_name': 'Global',\n",
+ " 'country_iso_3s': []\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'WA',\n",
+ " 'region_name': 'West Asia',\n",
+ " 'country_iso_3s': [\n",
+ " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n",
+ " ]\n",
+ " }\n",
+ "]\n",
+ "\n",
+ "# Convert the region data to a dictionary that maps each country to its region name\n",
+ "country_to_region = {}\n",
+ "for region in regions_data:\n",
+ " for country in region['country_iso_3s']:\n",
+ " country_to_region[country] = region['region_name']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MRGID | \n",
+ " GEONAME | \n",
+ " MRGID_TER1 | \n",
+ " POL_TYPE | \n",
+ " MRGID_SOV1 | \n",
+ " TERRITORY1 | \n",
+ " ISO_TER1 | \n",
+ " SOVEREIGN1 | \n",
+ " MRGID_TER2 | \n",
+ " MRGID_SOV2 | \n",
+ " ... | \n",
+ " ISO_SOV2 | \n",
+ " ISO_SOV3 | \n",
+ " UN_SOV1 | \n",
+ " UN_SOV2 | \n",
+ " UN_SOV3 | \n",
+ " UN_TER1 | \n",
+ " UN_TER2 | \n",
+ " UN_TER3 | \n",
+ " geometry | \n",
+ " REGIONS | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 8444.0 | \n",
+ " American Samoa Exclusive Economic Zone | \n",
+ " 8670.0 | \n",
+ " 200NM | \n",
+ " 2204.0 | \n",
+ " American Samoa | \n",
+ " ASM | \n",
+ " United States | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 840 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 16.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " POLYGON ((-16216412.543 -2157569.856, -1621650... | \n",
+ " North America | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 8379.0 | \n",
+ " Ascension Exclusive Economic Zone | \n",
+ " 8620.0 | \n",
+ " 200NM | \n",
+ " 2208.0 | \n",
+ " Ascension | \n",
+ " SHN | \n",
+ " United Kingdom | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 826 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 654.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " POLYGON ((-1089355.142 -974062.004, -1089348.4... | \n",
+ " Europe | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2 rows × 33 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MRGID GEONAME MRGID_TER1 POL_TYPE \\\n",
+ "0 8444.0 American Samoa Exclusive Economic Zone 8670.0 200NM \n",
+ "1 8379.0 Ascension Exclusive Economic Zone 8620.0 200NM \n",
+ "\n",
+ " MRGID_SOV1 TERRITORY1 ISO_TER1 SOVEREIGN1 MRGID_TER2 \\\n",
+ "0 2204.0 American Samoa ASM United States 0.0 \n",
+ "1 2208.0 Ascension SHN United Kingdom 0.0 \n",
+ "\n",
+ " MRGID_SOV2 ... ISO_SOV2 ISO_SOV3 UN_SOV1 UN_SOV2 UN_SOV3 UN_TER1 \\\n",
+ "0 0.0 ... NaN NaN 840 NaN NaN 16.0 \n",
+ "1 0.0 ... NaN NaN 826 NaN NaN 654.0 \n",
+ "\n",
+ " UN_TER2 UN_TER3 geometry \\\n",
+ "0 NaN NaN POLYGON ((-16216412.543 -2157569.856, -1621650... \n",
+ "1 NaN NaN POLYGON ((-1089355.142 -974062.004, -1089348.4... \n",
+ "\n",
+ " REGIONS \n",
+ "0 North America \n",
+ "1 Europe \n",
+ "\n",
+ "[2 rows x 33 columns]"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "eez['REGIONS'] = eez['ISO_SOV1'].map(country_to_region)\n",
+ "eez.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "eez.to_file(path_out + \"/administrative/eez_mollweide_regions.shp\", driver=\"ESRI Shapefile\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['North America', 'Europe', 'Asia & Pacific',\n",
+ " 'Latin America & Caribbean', 'Africa', 'West Asia', nan],\n",
+ " dtype=object)"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "eez['REGIONS'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Allocating 16 GB of heap memory\n",
+ "[dissolve2] Removed 127,740 / 218,614 slivers using 0.033+ sqkm variable threshold\n",
+ "[dissolve2] Dissolved 281 features into 7 features\n",
+ "[explode] Exploded 7 features into 83 features\n",
+ "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp\n",
+ "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shx\n",
+ "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.dbf\n",
+ "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.prj\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Dissolve by relevant fields: REGIONS\n",
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_mollweide_regions.shp -dissolve2 fields=REGIONS -explode -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " REGIONS | \n",
+ " geometry | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " North America | \n",
+ " POLYGON ((-16216412.543 -2157569.856, -1621650... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " North America | \n",
+ " POLYGON ((-15875617.974 972834.674, -15887321.... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " REGIONS geometry\n",
+ "0 North America POLYGON ((-16216412.543 -2157569.856, -1621650...\n",
+ "1 North America POLYGON ((-15875617.974 972834.674, -15887321...."
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "regions = gpd.read_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")\n",
+ "regions.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\n",
+ "Name: World_Mollweide\n",
+ "Axis Info [cartesian]:\n",
+ "- [east]: Easting (metre)\n",
+ "- [north]: Northing (metre)\n",
+ "Area of Use:\n",
+ "- undefined\n",
+ "Coordinate Operation:\n",
+ "- name: unnamed\n",
+ "- method: Mollweide\n",
+ "Datum: World Geodetic System 1984\n",
+ "- Ellipsoid: WGS 84\n",
+ "- Prime Meridian: Greenwich"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "regions.crs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['North America', 'Europe', 'Asia & Pacific',\n",
+ " 'Latin America & Caribbean', 'Africa', 'West Asia', nan],\n",
+ " dtype=object)"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "regions['REGIONS'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/var/folders/98/0pdnjc5s29x2pnzl293pw7hr0000gn/T/ipykernel_27590/1686611470.py:1: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " regions['REGIONS'][regions['REGIONS'].isna()] = 'Antartic'\n"
+ ]
+ }
+ ],
+ "source": [
+ "regions['REGIONS'][regions['REGIONS'].isna()] = 'Antartica'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Calculate area of each region\n",
+ "regions['AREA_KM2']= regions.geometry.area/ 1000000"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "regions.to_file(path_out + \"/administrative/eez_regions.shp\", driver=\"ESRI Shapefile\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Allocating 16 GB of heap memory\n",
+ "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp\n",
+ "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shx\n",
+ "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.dbf\n",
+ "[o] Wrote /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.prj\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Reproject to 4626\n",
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp -proj EPSG:4326 -o force /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/administrative/eez_regions.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['North America', 'Europe', 'Asia & Pacific',\n",
+ " 'Latin America & Caribbean', 'Africa', 'West Asia', 'Antarctica'],\n",
+ " dtype=object)"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "regions['REGIONS'].unique()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "skytruth",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/data/notebooks/wdpa_coverage.ipynb b/data/notebooks/wdpa_coverage.ipynb
new file mode 100644
index 00000000..8b38f3bf
--- /dev/null
+++ b/data/notebooks/wdpa_coverage.ipynb
@@ -0,0 +1,1233 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Method used to calculate global protected areas and OECM coverage\n",
+ "\n",
+ "The protected area coverage is calculated (from WDPA [methodolgy](https://www.protectedplanet.net/en/resources/calculating-protected-area-coverage)):\n",
+ "\n",
+ "1. Start with the latest WDPA monthly release\n",
+ "2. The WDPA is filtered to exclude records with the characteristics listed in Section 2\n",
+ "3. A buffer is created around protected areas reported as points using their Reported Area. There are important caveats associated with this method, some of which are explored by Visconti et al. 2013. Buffering points can underestimate or overestimate protected area coverage as the circles created around points might cover areas where protected areas do not exist (overestimation) or overlap with areas where other protected areas already exist (underestimation). It can also give inaccurate values for sites that are partly terrestrial and marine as the absence of boundaries make it difficult to predict which portion of a protected area is in the land or the sea.\n",
+ "4. Both polygon and buffered point layers are combined in a single layer\n",
+ "5. The layer above is flattened (dissolved) – to eliminate overlaps between designations and avoid double counting.\n",
+ "6. The global protected areas flat layer is intersected with a base map of the world (see Section 3)\n",
+ "7. The intersected flat layer is converted to Mollweide (an equal area projection) and the area of each polygon is calculated, in km2.\n",
+ "8. Calculated areas are summed by land, marine and Areas Beyond National Jurisdiction (ABNJ). Marine and coastal area are those outlined in the Economic Exclusion Zones dataset (see Section 3 above). ABNJ constitute international waters outside the 200 nautical mile limits of national jurisdiction.\n",
+ "9. The terrestrial protected area coverage is calculated by dividing the total area of terrestrial protected areas by total global terrestrial area excluding Antarctica. ABNJ protected area coverage is calculated by selecting areas where ISO3 = 'ABNJ'. Marine and coastal protected area coverage is total global protected areas flat coverage - (ABNJ Area + Land Area)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Set up"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import geopandas as gpd\n",
+ "import pandas as pd\n",
+ "from datetime import datetime"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path_in = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/raw\"\n",
+ "path_out = \"/Users/sofia/Documents/Repos/skytruth_30x30/data/processed\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 1. Download and import last release of [MPA](https://www.protectedplanet.net/en/thematic-areas/marine-protected-areas): Sept 2023"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "poly1 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\")\n",
+ "point1 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_0/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\")\n",
+ "poly2 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_1/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\")\n",
+ "point2 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_1/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\")\n",
+ "poly3 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_2/WDPA_WDOECM_Sep2023_Public_marine_shp-polygons.shp\")\n",
+ "point3 = gpd.read_file(path_in + \"/WDPA_WDOECM_Sep2023_Public_marine_shp/WDPA_WDOECM_Sep2023_Public_marine_shp_2/WDPA_WDOECM_Sep2023_Public_marine_shp-points.shp\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(len(poly1))\n",
+ "print(len(point1))\n",
+ "print(len(poly2))\n",
+ "print(len(point2))\n",
+ "print(len(poly3))\n",
+ "print(len(point3))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 2. Filter WDPA to exclude records:\n",
+ "- \"Non Reported\" protected areas (methodology recommends to remove also \"Proposed\" but we keep it for future projections)\n",
+ "- MAB (Note: MAB sites reported as OECMs are included in coverage analyses)\n",
+ "- Sites submitted as points with no reported area"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataframes = [poly1, point1, poly2, point2, poly3, point3]\n",
+ "\n",
+ "for i, df in enumerate(dataframes):\n",
+ " # Remove rows where 'status' is equal to 'Not Reported'\n",
+ " df = df[df['STATUS'] != 'Not Reported']\n",
+ " \n",
+ " # Remove rows where 'DESIG' contains 'MAB'\n",
+ " df = df[~df['DESIG_ENG'].str.contains('MAB', case=False)]\n",
+ " \n",
+ " # Check if the dataframe is one of point1, point2, or point3\n",
+ " if i in [1, 3, 5]:\n",
+ " # Remove rows where reported area is 0\n",
+ " df = df[(df['REP_AREA'] != 0)]\n",
+ " \n",
+ " # Update the original dataframes in the list\n",
+ " dataframes[i] = df\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(len(dataframes[0]))\n",
+ "print(len(dataframes[1]))\n",
+ "print(len(dataframes[2]))\n",
+ "print(len(dataframes[3]))\n",
+ "print(len(dataframes[4]))\n",
+ "print(len(dataframes[5]))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3. Create buffers around points based on reported area"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Calculate radius based on REP_AREA\n",
+ "def calculate_radius(rep_area):\n",
+ " return (rep_area / 3.14159265358979323846) ** 0.5\n",
+ "\n",
+ "# Iterate through the list and process the desired dataframes\n",
+ "for idx in [1, 3, 5]:\n",
+ " # Get the dataframe at the specified index\n",
+ " gdf = dataframes[idx]\n",
+ "\n",
+ " # Reproject in Mollweide\n",
+ " gdf = gdf.to_crs('ESRI:54009')\n",
+ "\n",
+ " # Transform the reported area from square kilometers to square meters\n",
+ " gdf['REP_AREA_m'] = gdf['REP_AREA'] * 1000000\n",
+ "\n",
+ " # Create the \"radius\" column by applying the calculate_radius function to the \"REP_AREA\" column\n",
+ " gdf['radius'] = gdf['REP_AREA_m'].apply(calculate_radius)\n",
+ "\n",
+ " # Create buffers around the points using the \"radius\" column\n",
+ " gdf_buffered = gdf.copy()\n",
+ " gdf_buffered['geometry'] = gdf.apply(lambda row: row.geometry.buffer(row['radius']), axis=1)\n",
+ "\n",
+ " # Reproject back to WGS84\n",
+ " gdf_buffered = gdf_buffered.to_crs('EPSG:4326')\n",
+ "\n",
+ " # Remove rows with invalid geometries\n",
+ " gdf_buffered = gdf_buffered[gdf_buffered['geometry'].is_valid]\n",
+ " \n",
+ " # Update the original dataframe with the buffered data\n",
+ " dataframes[idx] = gdf_buffered"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 4. Merge the 6 datasets (polygons and buffered points) in a single layer and segregate those that are \"Proposed\" and those that are OECM"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Check that all of them have the same crs\n",
+ "first_crs = dataframes[0].crs\n",
+ "same_crs = all(gdf.crs == first_crs for gdf in dataframes[1:])\n",
+ "if same_crs:\n",
+ " print(\"All gdf have the same crs:\", first_crs)\n",
+ "else:\n",
+ " print(\"gdf have different crs\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Check if geometries are valid\n",
+ "sum(merged_mpa.geometry.is_valid)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Save merged_mpa as a shapefile\n",
+ "merged_mpa.to_file(path_out + \"/wdpa/merged_mpa.shp\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Separate \"Proposed\"**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Select only the rows where 'STATUS' is equal to 'Proposed'\n",
+ "proposed = merged_mpa[merged_mpa['STATUS'] == 'Proposed']\n",
+ "\n",
+ "# Select only the rows where 'STATUS' is different from 'Proposed'\n",
+ "protected = merged_mpa[merged_mpa['STATUS'] != 'Proposed']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Save the two dataframes as shapefiles\n",
+ "proposed.to_file(path_out + \"/wdpa/proposed.shp\")\n",
+ "protected.to_file(path_out + \"/wdpa/protected.shp\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(len(proposed))\n",
+ "print(len(protected))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 5. Dissolve intersecting polygons by relevant fields"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "protected.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2000' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2000.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2001' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2001.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2002' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2002.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2003' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2003.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2004' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2004.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2005' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2005.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2006' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2006.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2007' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2007.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2008' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2008.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2009' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2009.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2010' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2010.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2011' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2011.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2012' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2012.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2013' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2013.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2014' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2014.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2015' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2015.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2016' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2016.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2017' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2017.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2018' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2018.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2019' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2019.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2020' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2020.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2021' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2021.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2022' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2022.shp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!mapshaper-xl 16gb /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/protected.shp -filter 'STATUS_YR<=2023' -dissolve2 fields=PA_DEF, PARENT_ISO -explode -proj +proj=moll +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +type=crs -each AREA=this.area -o /Users/sofia/Documents/Repos/skytruth_30x30/data/processed/wdpa/timeseries/protected_dissolved_2023.shp"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Calculate coverage statistics"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Countries per PARENT_ISO**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "p2023 = gpd.read_file(path_out + \"/wdpa/timeseries/protected_dissolved_2023.shp\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['ATG', 'BRB', 'BRA', 'GBR', 'CHL', 'COL', 'CRI', 'DOM', 'ECU',\n",
+ " 'JAM', 'NLD', 'PER', 'PAN', 'SUR', 'VEN', 'USA', 'AUS', 'CAN',\n",
+ " 'FRA', 'IRN', 'JPN', 'KEN', 'KOR', 'MYS', 'MRT', 'MOZ', 'NOR',\n",
+ " 'PHL', 'POL', 'SAU', 'SEN', 'SWE', 'THA', 'TUN', 'CMR', 'IDN',\n",
+ " 'MUS', 'PRT', 'SYC', 'ISL', 'NZL', 'EST', 'GEO', 'UKR', 'MEX',\n",
+ " 'BHS', 'BLZ', 'GMB', 'MDG', 'HRV', 'FJI', 'LKA', 'ARG', 'ZAF',\n",
+ " 'PNG', 'TON', 'PLW', 'COK', 'BGD', 'AGO', 'ALB', 'DNK', 'ITA',\n",
+ " 'PAK', 'FIN', 'VNM', 'MMR', 'CHN', 'SGP', 'DEU', 'ROU', 'EGY',\n",
+ " 'SLB', 'VUT', 'BGR', 'MAR', 'MLT', 'DMA', 'LCA', 'OMN', 'GTM',\n",
+ " 'NIC', 'TTO', 'WSM', 'TZA', 'GRC', 'LBN', 'CUB', 'ISR', 'GRD',\n",
+ " 'VCT', 'BRN', 'ESP', 'JOR', 'ARE', 'HND', 'GNQ', 'KNA', 'LTU',\n",
+ " 'GNB', 'NGA', 'LVA', 'GUY', 'KAZ', 'BEL', 'GIN', 'IRL', 'RUS',\n",
+ " 'KHM', 'QAT', 'GAB', 'MDV', 'AZE', 'NAM', 'TUR', 'CPV', 'COG',\n",
+ " 'TUV', 'MCO', 'TKM', 'SVN', 'SLE', 'KIR', 'COM', 'NIU', 'FSM',\n",
+ " 'GHA', 'IOT', 'IND', 'LBR', 'CIV', 'SDN', 'SHN', 'SJM', 'UMI',\n",
+ " 'ATA', 'SYR', 'TLS', 'FRA;ITA;MCO', 'URY', 'ABNJ', 'NLD;DEU;DNK',\n",
+ " 'FIN;SWE', 'MHL', 'SLV', 'DZA', 'STP', 'YEM', 'COD', 'CYP', 'KWT',\n",
+ " 'HTI', 'MNE', 'BHR', 'LBY'], dtype=object)"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "p2023['PARENT_ISO'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "folder_path = path_out + '/wdpa/timeseries'\n",
+ "years_range = range(2000, 2024) # Range of years from 2000 to 2023\n",
+ "\n",
+ "# Create an empty list to store the results\n",
+ "results_list = []\n",
+ "\n",
+ "for year in years_range:\n",
+ " filename = f'protected_dissolved_{year}.shp'\n",
+ " file_path = os.path.join(folder_path, filename)\n",
+ " \n",
+ " if os.path.exists(file_path):\n",
+ " gdf = gpd.read_file(file_path)\n",
+ " grouped = gdf.groupby('PARENT_ISO')['AREA'].sum().reset_index()\n",
+ " \n",
+ " # Create columns\n",
+ " grouped['year'] = year\n",
+ " grouped['protection_type'] = 'MPA+OECM'\n",
+ " grouped.rename(columns={'PARENT_ISO': 'location_id', 'AREA': 'cumsum_area'}, inplace=True)\n",
+ " \n",
+ " # Append the result to the list\n",
+ " results_list.append(grouped)\n",
+ "\n",
+ "# Concatenate the list of results into a single DataFrame\n",
+ "final_df = pd.concat(results_list, ignore_index=True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Convert area to km2\n",
+ "final_df['cumsum_area'] =final_df['cumsum_area']/1000000"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Global**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " location_id | \n",
+ " cumsum_area | \n",
+ " year | \n",
+ " protection_type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ABNJ | \n",
+ " 594174.66 | \n",
+ " 2000 | \n",
+ " MPA+OECM | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " AGO | \n",
+ " 0.42 | \n",
+ " 2000 | \n",
+ " MPA+OECM | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ALB | \n",
+ " 103.05 | \n",
+ " 2000 | \n",
+ " MPA+OECM | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ARE | \n",
+ " 78.52 | \n",
+ " 2000 | \n",
+ " MPA+OECM | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " ARG | \n",
+ " 6155.67 | \n",
+ " 2000 | \n",
+ " MPA+OECM | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 3571 | \n",
+ " GLOB | \n",
+ " 28125365.96 | \n",
+ " 2019 | \n",
+ " MPA+OECM | \n",
+ "
\n",
+ " \n",
+ " 3572 | \n",
+ " GLOB | \n",
+ " 29624663.84 | \n",
+ " 2020 | \n",
+ " MPA+OECM | \n",
+ "
\n",
+ " \n",
+ " 3573 | \n",
+ " GLOB | \n",
+ " 29739178.77 | \n",
+ " 2021 | \n",
+ " MPA+OECM | \n",
+ "
\n",
+ " \n",
+ " 3574 | \n",
+ " GLOB | \n",
+ " 29910678.77 | \n",
+ " 2022 | \n",
+ " MPA+OECM | \n",
+ "
\n",
+ " \n",
+ " 3575 | \n",
+ " GLOB | \n",
+ " 29910724.21 | \n",
+ " 2023 | \n",
+ " MPA+OECM | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
3576 rows × 4 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " location_id cumsum_area year protection_type\n",
+ "0 ABNJ 594174.66 2000 MPA+OECM\n",
+ "1 AGO 0.42 2000 MPA+OECM\n",
+ "2 ALB 103.05 2000 MPA+OECM\n",
+ "3 ARE 78.52 2000 MPA+OECM\n",
+ "4 ARG 6155.67 2000 MPA+OECM\n",
+ "... ... ... ... ...\n",
+ "3571 GLOB 28125365.96 2019 MPA+OECM\n",
+ "3572 GLOB 29624663.84 2020 MPA+OECM\n",
+ "3573 GLOB 29739178.77 2021 MPA+OECM\n",
+ "3574 GLOB 29910678.77 2022 MPA+OECM\n",
+ "3575 GLOB 29910724.21 2023 MPA+OECM\n",
+ "\n",
+ "[3576 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Calculate global per year and append it\n",
+ "glob_df = final_df.groupby(['year', 'protection_type'])['cumsum_area'].sum().reset_index()\n",
+ "\n",
+ "glob_df['location_id'] = 'GLOB'\n",
+ "\n",
+ "final_df2 = pd.concat([final_df, glob_df], ignore_index=True)\n",
+ "final_df2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Reorder the columns and add column last updated\n",
+ "final_df2 = final_df2[['location_id', 'year', 'protection_type', 'cumsum_area']]\n",
+ "\n",
+ "current_date = datetime.now().strftime('%Y-%m-%d')\n",
+ "\n",
+ "final_df2 = final_df2.copy()\n",
+ "final_df2['last_updated'] = current_date"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Regions**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# List of dictionaries for data in Region_ISO3_PP.txt (list of regions used in the Protected Planet database)\n",
+ "regions_data = [\n",
+ " {\n",
+ " 'region_iso': 'AS',\n",
+ " 'region_name': 'Asia & Pacific',\n",
+ " 'country_iso_3s': [\n",
+ " \"AFG\", \"ASM\", \"AUS\", \"BGD\", \"BRN\", \"BTN\", \"CCK\", \"CHN\", \"COK\", \"CXR\", \"FJI\", \"FSM\", \"GUM\", \"HKG\", \"IDN\",\n",
+ " \"IND\", \"IOT\", \"IRN\", \"JPN\", \"KHM\", \"KIR\", \"KOR\", \"LAO\", \"LKA\", \"MAC\", \"MDV\", \"MHL\", \"MMR\", \"MNG\", \"MNP\",\n",
+ " \"MYS\", \"NCL\", \"NFK\", \"NIU\", \"NPL\", \"NRU\", \"NZL\", \"PAK\", \"PCN\", \"PHL\", \"PLW\", \"PNG\", \"PRK\", \"PYF\", \"SGP\",\n",
+ " \"SLB\", \"THA\", \"TKL\", \"TLS\", \"TON\", \"TUV\", \"TWN\", \"VNM\", \"VUT\", \"WLF\", \"WSM\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'AF',\n",
+ " 'region_name': 'Africa',\n",
+ " 'country_iso_3s': [\n",
+ " \"AGO\", \"BDI\", \"BEN\", \"BFA\", \"BWA\", \"CAF\", \"CIV\", \"CMR\", \"COD\", \"COG\", \"COM\", \"CPV\", \"DJI\", \"DZA\", \"EGY\",\n",
+ " \"ERI\", \"ESH\", \"ETH\", \"GAB\", \"GHA\", \"GIN\", \"GMB\", \"GNB\", \"GNQ\", \"KEN\", \"LBR\", \"LBY\", \"LSO\", \"MAR\", \"MDG\",\n",
+ " \"MLI\", \"MOZ\", \"MRT\", \"MUS\", \"MWI\", \"MYT\", \"NAM\", \"NER\", \"NGA\", \"REU\", \"RWA\", \"SDN\", \"SEN\", \"SHN\", \"SLE\",\n",
+ " \"SOM\", \"SSD\", \"STP\", \"SWZ\", \"SYC\", \"TCD\", \"TGO\", \"TUN\", \"TZA\", \"UGA\", \"ZAF\", \"ZMB\", \"ZWE\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'EU',\n",
+ " 'region_name': 'Europe',\n",
+ " 'country_iso_3s': [\n",
+ " \"ALA\", \"ALB\", \"AND\", \"ARM\", \"AUT\", \"AZE\", \"BEL\", \"BGR\", \"BIH\", \"BLR\", \"CHE\", \"CYP\", \"CZE\", \"DEU\", \"DNK\",\n",
+ " \"ESP\", \"EST\", \"FIN\", \"FRA\", \"FRO\", \"GBR\", \"GEO\", \"GGY\", \"GIB\", \"GRC\", \"HRV\", \"HUN\", \"IMN\", \"IRL\", \"ISL\",\n",
+ " \"ISR\", \"ITA\", \"JEY\", \"KAZ\", \"KGZ\", \"LIE\", \"LTU\", \"LUX\", \"LVA\", \"MCO\", \"MDA\", \"MKD\", \"MLT\", \"MNE\", \"NLD\",\n",
+ " \"NOR\", \"POL\", \"PRT\", \"ROU\", \"RUS\", \"SJM\", \"SMR\", \"SRB\", \"SVK\", \"SVN\", \"SWE\", \"TJK\", \"TKM\", \"TUR\", \"UKR\",\n",
+ " \"UZB\", \"VAT\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'SA',\n",
+ " 'region_name': 'Latin America & Caribbean',\n",
+ " 'country_iso_3s': [\n",
+ " \"ABW\", \"AIA\", \"ARG\", \"ATG\", \"BES\", \"BHS\", \"BLM\", \"BLZ\", \"BMU\", \"BOL\", \"BRA\", \"BRB\", \"CHL\", \"COL\", \"CRI\",\n",
+ " \"CUB\", \"CUW\", \"CYM\", \"DMA\", \"DOM\", \"ECU\", \"FLK\", \"GLP\", \"GRD\", \"GTM\", \"GUF\", \"GUY\", \"HND\", \"HTI\", \"JAM\",\n",
+ " \"KNA\", \"LCA\", \"MAF\", \"MEX\", \"MSR\", \"MTQ\", \"NIC\", \"PAN\", \"PER\", \"PRI\", \"PRY\", \"SLV\", \"SUR\", \"SXM\", \"TCA\",\n",
+ " \"TTO\", \"UMI\", \"URY\", \"VCT\", \"VEN\", \"VGB\", \"VIR\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'PO',\n",
+ " 'region_name': 'Polar',\n",
+ " 'country_iso_3s': [\n",
+ " \"ATF\", \"BVT\", \"GRL\", \"HMD\", \"SGS\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'NA',\n",
+ " 'region_name': 'North America',\n",
+ " 'country_iso_3s': [\n",
+ " \"CAN\", \"SPM\", \"USA\"\n",
+ " ]\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'GL',\n",
+ " 'region_name': 'Global',\n",
+ " 'country_iso_3s': []\n",
+ " },\n",
+ " {\n",
+ " 'region_iso': 'WA',\n",
+ " 'region_name': 'West Asia',\n",
+ " 'country_iso_3s': [\n",
+ " \"ARE\", \"BHR\", \"IRQ\", \"JOR\", \"KWT\", \"LBN\", \"OMN\", \"PSE\", \"QAT\", \"SAU\", \"SYR\", \"YEM\"\n",
+ " ]\n",
+ " }\n",
+ "]\n",
+ "\n",
+ "# Convert the region data to a dictionary that maps each country to its region name\n",
+ "country_to_region = {}\n",
+ "for region in regions_data:\n",
+ " for country in region['country_iso_3s']:\n",
+ " country_to_region[country] = region['region_name']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " location_id | \n",
+ " cumsum_area | \n",
+ " year | \n",
+ " protection_type | \n",
+ " region | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ABNJ | \n",
+ " 594174.66 | \n",
+ " 2000 | \n",
+ " MPA+OECM | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " AGO | \n",
+ " 0.42 | \n",
+ " 2000 | \n",
+ " MPA+OECM | \n",
+ " Africa | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ALB | \n",
+ " 103.05 | \n",
+ " 2000 | \n",
+ " MPA+OECM | \n",
+ " Europe | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ARE | \n",
+ " 78.52 | \n",
+ " 2000 | \n",
+ " MPA+OECM | \n",
+ " West Asia | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " ARG | \n",
+ " 6155.67 | \n",
+ " 2000 | \n",
+ " MPA+OECM | \n",
+ " Latin America & Caribbean | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 3547 | \n",
+ " VNM | \n",
+ " 5036.97 | \n",
+ " 2023 | \n",
+ " MPA+OECM | \n",
+ " Asia & Pacific | \n",
+ "
\n",
+ " \n",
+ " 3548 | \n",
+ " VUT | \n",
+ " 83.83 | \n",
+ " 2023 | \n",
+ " MPA+OECM | \n",
+ " Asia & Pacific | \n",
+ "
\n",
+ " \n",
+ " 3549 | \n",
+ " WSM | \n",
+ " 199.59 | \n",
+ " 2023 | \n",
+ " MPA+OECM | \n",
+ " Asia & Pacific | \n",
+ "
\n",
+ " \n",
+ " 3550 | \n",
+ " YEM | \n",
+ " 4108.19 | \n",
+ " 2023 | \n",
+ " MPA+OECM | \n",
+ " West Asia | \n",
+ "
\n",
+ " \n",
+ " 3551 | \n",
+ " ZAF | \n",
+ " 242387.88 | \n",
+ " 2023 | \n",
+ " MPA+OECM | \n",
+ " Africa | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
3552 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " location_id cumsum_area year protection_type region\n",
+ "0 ABNJ 594174.66 2000 MPA+OECM NaN\n",
+ "1 AGO 0.42 2000 MPA+OECM Africa\n",
+ "2 ALB 103.05 2000 MPA+OECM Europe\n",
+ "3 ARE 78.52 2000 MPA+OECM West Asia\n",
+ "4 ARG 6155.67 2000 MPA+OECM Latin America & Caribbean\n",
+ "... ... ... ... ... ...\n",
+ "3547 VNM 5036.97 2023 MPA+OECM Asia & Pacific\n",
+ "3548 VUT 83.83 2023 MPA+OECM Asia & Pacific\n",
+ "3549 WSM 199.59 2023 MPA+OECM Asia & Pacific\n",
+ "3550 YEM 4108.19 2023 MPA+OECM West Asia\n",
+ "3551 ZAF 242387.88 2023 MPA+OECM Africa\n",
+ "\n",
+ "[3552 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "final_df['region'] = final_df['location_id'].map(country_to_region)\n",
+ "final_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " location_id | \n",
+ " cumsum_area | \n",
+ " year | \n",
+ " protection_type | \n",
+ " region | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ABNJ | \n",
+ " 594174.66 | \n",
+ " 2000 | \n",
+ " MPA+OECM | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " ATA | \n",
+ " 3594.42 | \n",
+ " 2000 | \n",
+ " MPA+OECM | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " FIN;SWE | \n",
+ " 3541.14 | \n",
+ " 2000 | \n",
+ " MPA+OECM | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 131 | \n",
+ " ABNJ | \n",
+ " 594174.66 | \n",
+ " 2001 | \n",
+ " MPA+OECM | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 136 | \n",
+ " ATA | \n",
+ " 3594.42 | \n",
+ " 2001 | \n",
+ " MPA+OECM | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 3397 | \n",
+ " ABNJ | \n",
+ " 2811451.69 | \n",
+ " 2023 | \n",
+ " MPA+OECM | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3402 | \n",
+ " ATA | \n",
+ " 3570.36 | \n",
+ " 2023 | \n",
+ " MPA+OECM | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3439 | \n",
+ " FIN;SWE | \n",
+ " 3541.14 | \n",
+ " 2023 | \n",
+ " MPA+OECM | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3442 | \n",
+ " FRA;ITA;MCO | \n",
+ " 87742.14 | \n",
+ " 2023 | \n",
+ " MPA+OECM | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3502 | \n",
+ " NLD;DEU;DNK | \n",
+ " 11550.01 | \n",
+ " 2023 | \n",
+ " MPA+OECM | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
110 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " location_id cumsum_area year protection_type region\n",
+ "0 ABNJ 594174.66 2000 MPA+OECM NaN\n",
+ "5 ATA 3594.42 2000 MPA+OECM NaN\n",
+ "37 FIN;SWE 3541.14 2000 MPA+OECM NaN\n",
+ "131 ABNJ 594174.66 2001 MPA+OECM NaN\n",
+ "136 ATA 3594.42 2001 MPA+OECM NaN\n",
+ "... ... ... ... ... ...\n",
+ "3397 ABNJ 2811451.69 2023 MPA+OECM NaN\n",
+ "3402 ATA 3570.36 2023 MPA+OECM NaN\n",
+ "3439 FIN;SWE 3541.14 2023 MPA+OECM NaN\n",
+ "3442 FRA;ITA;MCO 87742.14 2023 MPA+OECM NaN\n",
+ "3502 NLD;DEU;DNK 11550.01 2023 MPA+OECM NaN\n",
+ "\n",
+ "[110 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "final_df[final_df.region.isnull()]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['ABNJ', 'AGO', 'ALB', 'ARE', 'ARG', 'ATA', 'ATG', 'AUS', 'AZE',\n",
+ " 'BEL', 'BGD', 'BGR', 'BHR', 'BHS', 'BLZ', 'BRA', 'BRB', 'BRN',\n",
+ " 'CAN', 'CHL', 'CHN', 'COD', 'COG', 'COK', 'COL', 'CRI', 'CUB',\n",
+ " 'CYP', 'DEU', 'DMA', 'DNK', 'DOM', 'ECU', 'EGY', 'ESP', 'EST',\n",
+ " 'FIN', 'FIN;SWE', 'FJI', 'FRA', 'FSM', 'GBR', 'GEO', 'GHA', 'GIN',\n",
+ " 'GMB', 'GNB', 'GNQ', 'GRC', 'GRD', 'GTM', 'HND', 'HRV', 'IDN',\n",
+ " 'IRL', 'IRN', 'ISL', 'ISR', 'ITA', 'JAM', 'JPN', 'KAZ', 'KEN',\n",
+ " 'KHM', 'KIR', 'KNA', 'KOR', 'LBN', 'LBY', 'LCA', 'LKA', 'LTU',\n",
+ " 'LVA', 'MAR', 'MCO', 'MDG', 'MDV', 'MEX', 'MHL', 'MLT', 'MMR',\n",
+ " 'MNE', 'MOZ', 'MRT', 'MUS', 'MYS', 'NAM', 'NGA', 'NIC', 'NIU',\n",
+ " 'NLD', 'NOR', 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PLW',\n",
+ " 'PNG', 'POL', 'PRT', 'ROU', 'RUS', 'SAU', 'SDN', 'SEN', 'SLB',\n",
+ " 'SLE', 'SUR', 'SVN', 'SWE', 'SYC', 'SYR', 'THA', 'TKM', 'TLS',\n",
+ " 'TON', 'TTO', 'TUN', 'TUR', 'TUV', 'TZA', 'UKR', 'USA', 'VCT',\n",
+ " 'VEN', 'VNM', 'VUT', 'WSM', 'ZAF', 'DZA', 'FRA;ITA;MCO', 'IOT',\n",
+ " 'GAB', 'IND', 'SGP', 'LBR', 'CIV', 'CPV', 'SLV', 'QAT', 'STP',\n",
+ " 'SHN', 'YEM', 'NLD;DEU;DNK', 'URY', 'CMR', 'COM', 'KWT', 'SJM',\n",
+ " 'GUY', 'UMI', 'HTI', 'JOR'], dtype=object)"
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "final_df['location_id'].unique() "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "final_df2.to_csv(path_out + '/tables/protected_area_coverage.csv', index=False)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "skytruth",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/data/pyproject.toml b/data/pyproject.toml
new file mode 100644
index 00000000..e10a202c
--- /dev/null
+++ b/data/pyproject.toml
@@ -0,0 +1,10 @@
+[tool.black]
+line-length = 100
+
+[tool.isort]
+profile = "black"
+
+[tool.ruff]
+select = ["E", "F", "N"]
+line-length = 100
+ignore = []
diff --git a/data/src/__init__.py b/data/src/__init__.py
new file mode 100644
index 00000000..e69de29b