diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile deleted file mode 100644 index 13045d1..0000000 --- a/.devcontainer/Dockerfile +++ /dev/null @@ -1,2 +0,0 @@ -FROM mcr.microsoft.com/devcontainers/python:0-3.10 -RUN pip install --upgrade pip \ No newline at end of file diff --git a/.github/requirements.txt b/.github/requirements.txt new file mode 100644 index 0000000..9928a46 --- /dev/null +++ b/.github/requirements.txt @@ -0,0 +1,20 @@ +earthengine-api==0.1.408 +geocube==0.4.2 +geopandas==0.14.1 +rioxarray==0.15.0 +odc-stac==0.3.8 +pystac-client==0.7.5 +pytest==7.4.3 +xarray-spatial==0.3.7 +xee==0.0.3 +utm==0.7.0 +osmnx==1.9.3 +dask[complete]==2023.11.0 +matplotlib==3.8.2 +s3fs==2024.5.0 +geemap==0.32.0 +pip==23.3.1 +boto3==1.34.124 +scikit-learn==1.5.0 +overturemaps==0.6.0 +git+https://github.com/isciences/exactextract \ No newline at end of file diff --git a/.github/workflows/build-image.yml b/.github/workflows/build-image.yml new file mode 100644 index 0000000..60c4d63 --- /dev/null +++ b/.github/workflows/build-image.yml @@ -0,0 +1,39 @@ +name: build-image +on: + workflow_dispatch: +jobs: + build-image: + name: build-image + runs-on: ubuntu-22.04 + steps: + - name: Clean up Ubuntu + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Get VERSION + run: echo "VERSION=$(cat VERSION)" >> $GITHUB_ENV + - name: Build Image + id: build-image + uses: redhat-actions/buildah-build@v2 + with: + image: wri-cities-cif-environment + tags: latest ${{ env.VERSION }} ${{ github.sha }} + containerfiles: | + ./container/Containerfile + - name: Push image to container registry + id: push-image-to-registry + uses: redhat-actions/push-to-registry@v2 + with: + image: ${{ steps.build-image.outputs.image }} + tags: ${{ steps.build-image.outputs.tags }} + registry: ghcr.io/wri + username: ${{ secrets.REGISTRY_USER }} + password: ${{ secrets.REGISTRY_PASSWORD }} + - name: Print image url + run: echo "Image pushed to ${{ steps.push-image-to-registry.outputs.registry-paths }}" diff --git a/.github/workflows/dev_ci_cd.yml b/.github/workflows/dev_ci_cd.yml new file mode 100644 index 0000000..6d55444 --- /dev/null +++ b/.github/workflows/dev_ci_cd.yml @@ -0,0 +1,36 @@ +name: Dev CIF API CI/CD + +on: + pull_request: + +permissions: + contents: read +jobs: + build: + runs-on: ubuntu-latest + strategy: + max-parallel: 4 + matrix: + python-version: ["3.10"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install Linux dependencies + run: | + sudo apt update + sudo apt install -y gdal-bin libgdal-dev + - name: Install Packages + run: | + python -m pip install --upgrade pip + pip install -r .github/requirements.txt + pip install GDAL==`gdal-config --version` + - name: Run Tests + env: + GOOGLE_APPLICATION_USER: ${{ secrets.GOOGLE_APPLICATION_USER }} + GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }} + run: | + pytest tests diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml deleted file mode 100644 index 372668c..0000000 --- a/.github/workflows/tests.yml +++ /dev/null @@ -1,34 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a single version of Python -# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python - -name: Python application - -on: - push: - branches: [ "main" ] - pull_request: - branches: [ "main" ] - -permissions: - contents: read - -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Set up Python 3.10 - uses: actions/setup-python@v3 - with: - python-version: "3.10" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pytest - python setup.py install - - name: Test with pytest - run: | - cd tests/ - pytest layers.py metrics.py diff --git a/README.md b/README.md index 3808c6c..6b23e83 100644 --- a/README.md +++ b/README.md @@ -3,45 +3,61 @@ The Cities Indicator Framework (CIF) is a set of Python tools to make it easier to calculate zonal statistics for cities by providing a standardized set of data layers for inputs and a common framework for using those layers to calculate indicators. ## Quick start + * If all you want to do is use the CIF, the quickest way to get started is to use our [WRI Cities Indicator Framework Colab Notebook](https://colab.research.google.com/drive/1PV1H-godxJ6h42p74Ij9sdFh3T0RN-7j#scrollTo=eM14UgpmpZL-) ## Installation +* `pip install git+https://github.com/wri/cities-cif@v0.1.2` to install a specific version. * `pip install git+https://github.com/wri/cities-cif/releases/latest` gives you the latest stable release. * `pip install git+https://github.com/wri/cities-cif` gives you the main branch with is not stable. ## PR Review -0. Prerequisites - 1. Git - * On Windows I recommend WSL https://learn.microsoft.com/en-us/windows/wsl/tutorials/wsl-git - 3. https://cli.github.com/ - * On MacOS I recommend the Homebrew option - * If you don't have an ssh key, it will install one for you - 4. Conda (or Mamba) to install dependencies - * If you have Homebrew `brew install --cask miniconda` +0. Prerequisites +1. Git + * On Windows I recommend WSL [https://learn.microsoft.com/en-us/windows/wsl/tutorials/wsl-git](https://learn.microsoft.com/en-us/windows/wsl/tutorials/wsl-git) +2. [https://cli.github.com/](https://cli.github.com/) + * On MacOS I recommend the Homebrew option + * If you don't have an ssh key, it will install one for you +3. Conda (or Mamba) to install dependencies + * If you have Homebrew `brew install --cask miniconda` ## Dependencies + +There are 2 ways to install dependencies. Choose one... + ### Conda + `conda env create -f environment.yml` +### Setuptools + +`python setup.py` +NOTE: If you are using this method you may want to use something like pyenv to manage Python environments + ## Credentials -To run the module, you need access to Google Earth Engine. + +To run the module, + + 1. You need access to Google Earth Engine + 2. Install ### Interactive development + For most people working in a notebook or IDE the script should walk you thourgh an interactive authentication process. You will just need to be logged in to your Google account that has access to GEE in your browser. ### Programatic access + If you have issues with this or need to run the script as part of an automated workflow we have a GEE-enabled GCP service account that can be used. Get in touch with Saif or Chris to ask about getting the credetials. Set the following environment variables: -- GOOGLE_APPLICATION_CREDENTIALS: The path of GCP credentials JSON file containing your private key. -- GOOGLE_APPLICATION_USER: The email for your GCP user. -- GCS_BUCKET: The GCS bucket to read and write data from. + +* GOOGLE_APPLICATION_CREDENTIALS: The path of GCP credentials JSON file containing your private key. +* GOOGLE_APPLICATION_USER: The email for your GCP user. For example, you could set the following in your `~/.zshrc` file: ``` -export GCS_BUCKET=gee-exports export GOOGLE_APPLICATION_USER=developers@citiesindicators.iam.gserviceaccount.com export GOOGLE_APPLICATION_CREDENTIALS=/path/to/credentials/file ``` @@ -50,5 +66,6 @@ export GOOGLE_APPLICATION_CREDENTIALS=/path/to/credentials/file All are welcome to contribute by creating a [Pull Request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests). We try to follow the [Github Flow](https://docs.github.com/en/get-started/quickstart/github-flow) workflow. -See the [developer docs](docs/developer.md) to learn more about how to add data layers and indicators. +See [PR Review](docs/pr_review.md) for more details and options on how to review a PR. +See the [developer docs](docs/developer.md) to learn more about how to add data layers and indicators. diff --git a/city_metrix/__init__.py b/city_metrix/__init__.py index bbda6db..49cf44f 100644 --- a/city_metrix/__init__.py +++ b/city_metrix/__init__.py @@ -1,26 +1,37 @@ -from .metrics import * import os -import ee import warnings +import ee + +from .metrics import * + # initialize ee -if "GOOGLE_APPLICATION_CREDENTIALS" in os.environ and "GOOGLE_APPLICATION_USER" in os.environ: +if ( + "GOOGLE_APPLICATION_CREDENTIALS" in os.environ + and "GOOGLE_APPLICATION_USER" in os.environ +): print("Authenticating to GEE with configured credentials file.") CREDENTIAL_FILE = os.environ["GOOGLE_APPLICATION_CREDENTIALS"] GEE_SERVICE_ACCOUNT = os.environ["GOOGLE_APPLICATION_USER"] - auth = ee.ServiceAccountCredentials(GEE_SERVICE_ACCOUNT, CREDENTIAL_FILE) - ee.Initialize(auth, opt_url='https://earthengine-highvolume.googleapis.com') + if CREDENTIAL_FILE.endswith(".json"): + auth = ee.ServiceAccountCredentials( + GEE_SERVICE_ACCOUNT, key_file=CREDENTIAL_FILE + ) + else: + auth = ee.ServiceAccountCredentials( + GEE_SERVICE_ACCOUNT, key_data=CREDENTIAL_FILE + ) + ee.Initialize(auth, opt_url="https://earthengine-highvolume.googleapis.com") else: print("Could not find GEE credentials file, so prompting authentication.") ee.Authenticate() - ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com') + ee.Initialize(opt_url="https://earthengine-highvolume.googleapis.com") # set for AWS requests os.environ["AWS_REQUEST_PAYER"] = "requester" # disable warning messages -warnings.filterwarnings('ignore', module='xee') -warnings.filterwarnings('ignore', module='dask') -warnings.filterwarnings('ignore', module='xarray') - +warnings.filterwarnings("ignore", module="xee") +warnings.filterwarnings("ignore", module="dask") +warnings.filterwarnings("ignore", module="xarray") diff --git a/city_metrix/layers/__init__.py b/city_metrix/layers/__init__.py index ea7fa8a..669e727 100644 --- a/city_metrix/layers/__init__.py +++ b/city_metrix/layers/__init__.py @@ -1,4 +1,5 @@ from .albedo import Albedo +from .ndvi_sentinel2_gee import NdviSentinel2 from .esa_world_cover import EsaWorldCover, EsaWorldCoverClass from .land_surface_temperature import LandSurfaceTemperature from .tree_cover import TreeCover diff --git a/city_metrix/layers/albedo.py b/city_metrix/layers/albedo.py index dd3ba8a..7bf7b11 100644 --- a/city_metrix/layers/albedo.py +++ b/city_metrix/layers/albedo.py @@ -13,7 +13,7 @@ def __init__(self, start_date="2021-01-01", end_date="2022-01-01", threshold=Non self.threshold = threshold def get_data(self, bbox): - S2 = ee.ImageCollection("COPERNICUS/S2_SR") + S2 = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED") S2C = ee.ImageCollection("COPERNICUS/S2_CLOUD_PROBABILITY") MAX_CLOUD_PROB = 30 diff --git a/city_metrix/layers/high_land_surface_temperature.py b/city_metrix/layers/high_land_surface_temperature.py index 610651b..d3943e9 100644 --- a/city_metrix/layers/high_land_surface_temperature.py +++ b/city_metrix/layers/high_land_surface_temperature.py @@ -54,8 +54,3 @@ def addDate(image): # convert to date object return datetime.datetime.strptime(hottest_date, "%Y%m%d").date() - - def write(self, output_path): - self.data.rio.to_raster(output_path) - - diff --git a/city_metrix/layers/land_surface_temperature.py b/city_metrix/layers/land_surface_temperature.py index 0c87f30..7fff632 100644 --- a/city_metrix/layers/land_surface_temperature.py +++ b/city_metrix/layers/land_surface_temperature.py @@ -33,8 +33,3 @@ def apply_scale_factors(image): data = get_image_collection(ee.ImageCollection(l8_st), bbox, 30, "LST").ST_B10_mean return data - - def write(self, output_path): - self.data.rio.to_raster(output_path) - - diff --git a/city_metrix/layers/landsat_collection_2.py b/city_metrix/layers/landsat_collection_2.py index d82180d..248227a 100644 --- a/city_metrix/layers/landsat_collection_2.py +++ b/city_metrix/layers/landsat_collection_2.py @@ -29,8 +29,7 @@ def get_data(self, bbox): fail_on_error=False, ) + # TODO: Determine how to output xarray + qa_lst = lc2.where((lc2.qa_pixel & 24) == 0) return qa_lst.drop_vars("qa_pixel") - - - diff --git a/city_metrix/layers/layer.py b/city_metrix/layers/layer.py index 299b0a1..01ad6e4 100644 --- a/city_metrix/layers/layer.py +++ b/city_metrix/layers/layer.py @@ -18,10 +18,8 @@ import shapely.geometry as geometry import pandas as pd - MAX_TILE_SIZE = 0.5 - class Layer: def __init__(self, aggregate=None, masks=[]): self.aggregate = aggregate @@ -56,7 +54,7 @@ def groupby(self, zones, layer=None): """ return LayerGroupBy(self.aggregate, zones, layer, self.masks) - def write(self, bbox, output_path, tile_degrees=None): + def write(self, bbox, output_path, tile_degrees=None, **kwargs): """ Write the layer to a path. Does not apply masks. @@ -301,21 +299,23 @@ def get_image_collection( return data - def write_layer(path, data): if isinstance(data, xr.DataArray): - # for rasters, need to write to locally first then copy to cloud storage - if path.startswith("s3://"): - tmp_path = f"{uuid4()}.tif" - data.rio.to_raster(raster_path=tmp_path, driver="COG") - - s3 = boto3.client('s3') - s3.upload_file(tmp_path, path.split('/')[2], '/'.join(path.split('/')[3:])) - - os.remove(tmp_path) - else: - data.rio.to_raster(raster_path=path, driver="COG") + write_dataarray(path, data) elif isinstance(data, gpd.GeoDataFrame): data.to_file(path, driver="GeoJSON") else: - raise NotImplementedError("Can only write DataArray or GeoDataFrame") + raise NotImplementedError("Can only write DataArray, Dataset, or GeoDataFrame") + +def write_dataarray(path, data): + # for rasters, need to write to locally first then copy to cloud storage + if path.startswith("s3://"): + tmp_path = f"{uuid4()}.tif" + data.rio.to_raster(raster_path=tmp_path, driver="COG") + + s3 = boto3.client('s3') + s3.upload_file(tmp_path, path.split('/')[2], '/'.join(path.split('/')[3:])) + + os.remove(tmp_path) + else: + data.rio.to_raster(raster_path=path, driver="COG") diff --git a/city_metrix/layers/ndvi_sentinel2_gee.py b/city_metrix/layers/ndvi_sentinel2_gee.py new file mode 100644 index 0000000..c5b21b9 --- /dev/null +++ b/city_metrix/layers/ndvi_sentinel2_gee.py @@ -0,0 +1,46 @@ +import ee +from .layer import Layer, get_image_collection + +class NdviSentinel2(Layer): + """" + NDVI = Sentinel-2 Normalized Difference Vegetation Index + param: year: The satellite imaging year. + return: a rioxarray-format DataArray + Author of associated Jupyter notebook: Ted.Wong@wri.org + Notebook: https://github.com/wri/cities-cities4forests-indicators/blob/dev-eric/scripts/extract-VegetationCover.ipynb + Reference: https://en.wikipedia.org/wiki/Normalized_difference_vegetation_index + """ + def __init__(self, year=None, **kwargs): + super().__init__(**kwargs) + self.year = year + + def get_data(self, bbox): + if self.year is None: + raise Exception('NdviSentinel2.get_data() requires a year value') + + start_date = "%s-01-01" % self.year + end_date = "%s-12-31" % self.year + + # Compute NDVI for each image + def calculate_ndvi(image): + ndvi = (image + .normalizedDifference(['B8', 'B4']) + .rename('NDVI')) + return image.addBands(ndvi) + + s2 = ee.ImageCollection("COPERNICUS/S2_HARMONIZED") + ndvi = (s2 + .filterBounds(ee.Geometry.BBox(*bbox)) + .filterDate(start_date, end_date) + .map(calculate_ndvi) + .select('NDVI') + ) + + ndvi_mosaic = ndvi.qualityMosaic('NDVI') + + ic = ee.ImageCollection(ndvi_mosaic) + ndvi_data = get_image_collection(ic, bbox, 10, "NDVI") + + xdata = ndvi_data.to_dataarray() + + return xdata diff --git a/city_metrix/layers/sentinel_2_level_2.py b/city_metrix/layers/sentinel_2_level_2.py index a7ae944..a609293 100644 --- a/city_metrix/layers/sentinel_2_level_2.py +++ b/city_metrix/layers/sentinel_2_level_2.py @@ -50,4 +50,6 @@ def get_data(self, bbox): cloud_masked = s2.where(s2 != 0).where(s2.scl != 3).where(s2.scl != 8).where(s2.scl != 9).where( s2.scl != 10) + # TODO: Determine how to output as an xarray + return cloud_masked.drop_vars("scl") diff --git a/city_metrix/layers/smart_surface_lulc.py b/city_metrix/layers/smart_surface_lulc.py index 171bca5..d58f468 100644 --- a/city_metrix/layers/smart_surface_lulc.py +++ b/city_metrix/layers/smart_surface_lulc.py @@ -7,6 +7,7 @@ import psutil from exactextract import exact_extract import pickle +import importlib.resources as pkg_resources import warnings warnings.filterwarnings('ignore',category=UserWarning) @@ -24,7 +25,7 @@ def get_data(self, bbox): crs = get_utm_zone_epsg(bbox) # load building roof slope classifier - with open('city_metrix/models/building_classifier/building_classifier.pkl', 'rb') as f: + with pkg_resources.files('city_metrix.models.building_classifier').joinpath('building_classifier.pkl').open('rb') as f: clf = pickle.load(f) # ESA world cover diff --git a/city_metrix/models/__init__.py b/city_metrix/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/city_metrix/models/building_classifier/__init__.py b/city_metrix/models/building_classifier/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/container/Containerfile b/container/Containerfile new file mode 100644 index 0000000..2ad0631 --- /dev/null +++ b/container/Containerfile @@ -0,0 +1,2 @@ +FROM ghcr.io/wri/wri-data-science-environment:latest +LABEL maintainer="Chris Rowe" \ No newline at end of file diff --git a/container/VERSION b/container/VERSION new file mode 100644 index 0000000..ceab6e1 --- /dev/null +++ b/container/VERSION @@ -0,0 +1 @@ +0.1 \ No newline at end of file diff --git a/docs/developer.md b/docs/developer.md index e273e9f..c6b1855 100644 --- a/docs/developer.md +++ b/docs/developer.md @@ -11,7 +11,7 @@ The `city_metrix` library allows users of geospatial data to collect and apply z It provides two main functionalities: 1. Extracting geospatial `layers` based on specific areas of interests (defined as geodataframe)' These data layers are collected from any cloud source (Google Earth Engine, AWS S3 public buckets, Public APIs). Two formats of data layers are handled in `city_metrix`: Rasters and vectors. - Rasters data are collected and transformed into _arrays_ using `xarray` (GEE images collections are converted also into `arrays` using `xee`). - - Vectors adata are stored as `GeoDataFrame`. + - Vectors data are stored as `GeoDataFrame`. 2. Measuring `indicators` using the extracted `layers` by implementing zonal statistics operations The main package source code is located in the `city_metrix` directory. @@ -81,9 +81,11 @@ Hopefully we already have the layers you need in `city_metrix/layers/` and you c 5. Add a test to [tests/layers.py](../tests/layers.py) to ensure the new layer is working as expected. -6. Add a section to the get layers.ipynb notebook to demonstrate how to use the new layer. +6. Add new dependencies to [setup.py](../setup.py) and [environment.yml](../environment.yml). -7. Create a PR to merge the new layer into the main branch with these in the PR description: +7. Add a section to the get layers.ipynb notebook to demonstrate how to use the new layer. + +8. Create a PR to merge the new layer into the main branch with these in the PR description: - Link to Jira ticket (if any) - A brief description of the new layer - A link to the Airtable record for the new layer @@ -101,6 +103,16 @@ Once you have all the data layers you need as inputs, here is the process to cre Define a function for new indicator with the input of the calculation zones as a `GeoDataFrame` and output of the calculated indicators as a `GeoSeries`. +3. Add a test to [tests/metrics.py](../tests/metrics.py) to ensure the new indicator is working as expected. + +4. Add new dependencies to [setup.py](../setup.py) and [environment.yml](../environment.yml). + +5. Create a PR to merge the new indicator into the main branch with these in the PR description: + - Link to Jira ticket (if any) + - A brief description of the new indicator + - A link to the Airtable record for the new indicator + - Explain how to test the new indicator + ## Adding Cities You can always have users just provide their own boudary files, but if you are working on a project where you want to provide access to a common set of city boundaries, the best option is to add them to the API diff --git a/docs/pr_review.md b/docs/pr_review.md new file mode 100644 index 0000000..9cd4456 --- /dev/null +++ b/docs/pr_review.md @@ -0,0 +1,26 @@ +# Pull Request Review + +There are 4 options for doing code review +## Github +1. Go to https://github.com/wri/cities-cif/pulls and find the PR you want to review +2. Use Github's web based interface to review the code changes, make inline comments, and submit the review. See https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/reviewing-changes-in-pull-requests/reviewing-proposed-changes-in-a-pull-request for more details. + +NOTE: This does not allow you to run the code, so is not the prefered approach, especially for more complicated changes. + +## Colab +1. Open a new Colab notebook at https://colab.new/ +2. Install the package from the branch you want to review with `!pip install git+https://github.com/wri/cities-cif@[branch-name]` replaceing `[branch-name]` with the branch you want to review. e.g `!pip install git+https://github.com/wri/cities-cif@feature/max_dsm` for the `feature/max_dsm` branch. https://github.com/wri/cities-cif/tree/feature/max_dsm +3. Run the code to test that it works as expected +4. Use the Github PR UI (as described above) to submit your review. + +See https://drive.google.com/drive/u/0/folders/1W5VxN_7_WdjnX64SgI5SxAnJW9hhigA_ for examples + +## Codespace +In progress. + +## Local +1. Check out the branch locally +2. Run the code to test that it works as expected +3. Use the Github PR UI (as described above) to submit your review. + + diff --git a/environment.yml b/environment.yml index 18f95f4..24ec040 100644 --- a/environment.yml +++ b/environment.yml @@ -13,7 +13,7 @@ dependencies: - xarray-spatial=0.3.7 - xee=0.0.3 - utm=0.7.0 - - osmnx=1.8.1 + - osmnx=1.9.0 - dask[complete]=2023.11.0 - matplotlib=3.8.2 - jupyterlab=4.0.10 @@ -23,6 +23,5 @@ dependencies: - boto3=1.34.124 - scikit-learn=1.5.0 - pip: - - cartoframes==1.2.5 - - git+https://github.com/isciences/exactextract - - overturemaps==0.6.0 + - git+https://github.com/isciences/exactextract + - overturemaps==0.6.0 diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..de19c9f --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +testpaths = tests \ No newline at end of file diff --git a/setup.py b/setup.py index c2d24d8..3124621 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,16 @@ -from setuptools import setup +from setuptools import setup, find_packages setup( name="city_metrix", - version="0.1.0", + version="0.1.2", description="Module to calculate various metrics on cities.", - packages=["city_metrix", "city_metrix.layers", "city_metrix.metrics"], + packages=find_packages(), + include_package_data=True, + package_data={ + 'city_metrix.models.building_classifier': [ + 'building_classifier.pkl', + ], + }, author="Justin Terry", license="MIT", install_requires=[ @@ -16,10 +22,14 @@ "xarray-spatial", "xee", "rioxarray", - "cartoframes", "utm", "osmnx", "geopandas", "s3fs", + "dask>=2023.11.0", + "boto3", + "exactextract", + "overturemaps", + "scikit-learn>=1.5.0", ], ) diff --git a/tests/layers.py b/tests/layers.py deleted file mode 100644 index 620ee89..0000000 --- a/tests/layers.py +++ /dev/null @@ -1,122 +0,0 @@ -import ee - -from city_metrix.layers import LandsatCollection2, Albedo, LandSurfaceTemperature, EsaWorldCover, EsaWorldCoverClass, TreeCover, AverageNetBuildingHeight, OpenStreetMap, OpenStreetMapClass, UrbanLandUse, OpenBuildings, TreeCanopyHeight, AlosDSM, SmartSurfaceLULC, OvertureBuildings, NasaDEM -from city_metrix.layers.layer import get_image_collection -from .conftest import MockLayer, MockMaskLayer, ZONES, LARGE_ZONES, MockLargeLayer, MockGroupByLayer, \ - MockLargeGroupByLayer - -import pytest -import numpy as np - - -def test_count(): - counts = MockLayer().groupby(ZONES).count() - assert counts.size == 100 - assert all([count == 100 for count in counts]) - - -def test_mean(): - means = MockLayer().groupby(ZONES).mean() - assert means.size == 100 - assert all([mean == i for i, mean in enumerate(means)]) - - -def test_fishnetted_count(): - counts = MockLargeLayer().groupby(LARGE_ZONES).count() - assert counts.size == 100 - assert all([count == 100 for count in counts]) - - -def test_fishnetted_mean(): - means = MockLargeLayer().groupby(LARGE_ZONES).mean() - assert means.size == 100 - assert all([mean == i for i, mean in enumerate(means)]) - - -def test_masks(): - counts = MockLayer().mask(MockMaskLayer()).groupby(ZONES).count() - assert counts.size == 100 - for i, count in enumerate(counts): - if i % 2 == 0: - assert np.isnan(count) - else: - assert count == 100 - - -def test_group_by_layer(): - counts = MockLayer().groupby(ZONES, layer=MockGroupByLayer()).count() - assert all([count == {1: 50.0, 2: 50.0} for count in counts]) - - -def test_group_by_large_layer(): - counts = MockLargeLayer().groupby(LARGE_ZONES, layer=MockLargeGroupByLayer()).count() - assert all([count == {1: 50.0, 2: 50.0} for count in counts]) - - -SAMPLE_BBOX = (-38.35530428121955, -12.821710300686393, -38.33813814352424, -12.80363249765361) - - -def test_read_image_collection(): - ic = ee.ImageCollection("ESA/WorldCover/v100") - data = get_image_collection(ic, SAMPLE_BBOX, 10, "test") - - assert data.rio.crs == 32724 - assert data.dims == {'x': 187, 'y': 200} - - -def test_read_image_collection_scale(): - ic = ee.ImageCollection("ESA/WorldCover/v100") - data = get_image_collection(ic, SAMPLE_BBOX, 100, "test") - assert data.dims == {'x': 19, 'y': 20} - - -def test_tree_cover(): - assert pytest.approx(53.84184165912419, rel=0.001) == TreeCover().get_data(SAMPLE_BBOX).mean() - - -def test_albedo(): - assert Albedo().get_data(SAMPLE_BBOX).mean() - - -def test_lst(): - mean = LandSurfaceTemperature().get_data(SAMPLE_BBOX).mean() - assert mean - - -def test_esa(): - count = EsaWorldCover(land_cover_class=EsaWorldCoverClass.BUILT_UP).get_data(SAMPLE_BBOX).count() - assert count - -def test_average_net_building_height(): - assert AverageNetBuildingHeight().get_data(SAMPLE_BBOX).mean() - -def test_open_street_map(): - count = OpenStreetMap(osm_class=OpenStreetMapClass.ROAD).get_data(SAMPLE_BBOX).count().sum() - assert count - -def test_urban_land_use(): - assert UrbanLandUse().get_data(SAMPLE_BBOX).count() - -def test_openbuildings(): - count = OpenBuildings().get_data(SAMPLE_BBOX).count().sum() - assert count - -def test_tree_canopy_hight(): - count = TreeCanopyHeight().get_data(SAMPLE_BBOX).count() - assert count - -def test_alos_dsm(): - mean = AlosDSM().get_data(SAMPLE_BBOX).mean() - assert mean - -def test_smart_surface_lulc(): - count = SmartSurfaceLULC().get_data(SAMPLE_BBOX).count() - assert count - -def test_overture_buildings(): - count = OvertureBuildings().get_data(SAMPLE_BBOX).count().sum() - assert count - -def test_nasa_dem(): - mean = NasaDEM().get_data(SAMPLE_BBOX).mean() - assert mean diff --git a/tests/resources/__init__.py b/tests/resources/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/resources/bbox_constants.py b/tests/resources/bbox_constants.py new file mode 100644 index 0000000..9aaf8ad --- /dev/null +++ b/tests/resources/bbox_constants.py @@ -0,0 +1,22 @@ +# File defines bboxes using in the test code + + +BBOX_BRA_LAURO_DE_FREITAS_1 = ( + -38.35530428121955, + -12.821710300686393, + -38.33813814352424, + -12.80363249765361, +) + +BBOX_BRA_SALVADOR_ADM4 = ( + -38.647320153390055, + -13.01748678217598787, + -38.3041637148564007, + -12.75607703449720631 +) + +BBOX_SMALL_TEST = ( + -38.43864,-12.97987, + -38.39993,-12.93239 +) + diff --git a/tests/resources/layer_dumps_for_br_lauro_de_freitas/README.md b/tests/resources/layer_dumps_for_br_lauro_de_freitas/README.md new file mode 100644 index 0000000..d4dc944 --- /dev/null +++ b/tests/resources/layer_dumps_for_br_lauro_de_freitas/README.md @@ -0,0 +1,4 @@ +# QGIS manual analysis for Lauro de Freitas, Brazil +Folder contains: +1. Test code that can be set to output the layers as geotiff files. Execution is controlled by a "master switch" +1. A QGIS file used for manually inspecting the generated geotiff files diff --git a/tests/resources/layer_dumps_for_br_lauro_de_freitas/__init__.py b/tests/resources/layer_dumps_for_br_lauro_de_freitas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/resources/layer_dumps_for_br_lauro_de_freitas/conftest.py b/tests/resources/layer_dumps_for_br_lauro_de_freitas/conftest.py new file mode 100644 index 0000000..5882053 --- /dev/null +++ b/tests/resources/layer_dumps_for_br_lauro_de_freitas/conftest.py @@ -0,0 +1,67 @@ +import tempfile +import pytest +import os +import shutil +from collections import namedtuple + +from tests.resources.bbox_constants import BBOX_BRA_LAURO_DE_FREITAS_1 +from tools.general_tools import create_target_folder, is_valid_path + +# RUN_DUMPS is the master control for whether the writes and tests are executed +# Setting RUN_DUMPS to True turns on code execution. +# Values should normally be set to False in order to avoid unnecessary execution. +RUN_DUMPS = False + +# Specify None to write to a temporary default folder otherwise specify a valid custom target path. +CUSTOM_DUMP_DIRECTORY = None + +# Both the tests and QGIS file are implemented for the same bounding box in Brazil. +COUNTRY_CODE_FOR_BBOX = 'BRA' +BBOX = BBOX_BRA_LAURO_DE_FREITAS_1 + +def pytest_configure(config): + qgis_project_file = 'layers_for_br_lauro_de_freitas.qgz' + + source_folder = os.path.dirname(__file__) + target_folder = get_target_folder_path() + create_target_folder(target_folder, True) + + source_qgis_file = os.path.join(source_folder, qgis_project_file) + target_qgis_file = os.path.join(target_folder, qgis_project_file) + shutil.copyfile(source_qgis_file, target_qgis_file) + + print("\n\033[93m QGIS project file and layer files written to folder %s.\033[0m\n" % target_folder) + +@pytest.fixture +def target_folder(): + return get_target_folder_path() + +@pytest.fixture +def bbox_info(): + bbox = namedtuple('bbox', ['bounds', 'country']) + bbox_instance = bbox(bounds=BBOX, country=COUNTRY_CODE_FOR_BBOX) + return bbox_instance + +def get_target_folder_path(): + if CUSTOM_DUMP_DIRECTORY is not None: + if is_valid_path(CUSTOM_DUMP_DIRECTORY) is False: + raise ValueError(f"The custom path '%s' is not valid. Stopping." % CUSTOM_DUMP_DIRECTORY) + else: + output_dir = CUSTOM_DUMP_DIRECTORY + else: + sub_directory_name = 'test_result_tif_files' + scratch_dir_name = tempfile.TemporaryDirectory(ignore_cleanup_errors=True).name + dir_path = os.path.dirname(scratch_dir_name) + output_dir = os.path.join(dir_path, sub_directory_name) + + return output_dir + +def prep_output_path(output_folder, file_name): + file_path = os.path.join(output_folder, file_name) + if os.path.isfile(file_path): + os.remove(file_path) + return file_path + +def verify_file_is_populated(file_path): + is_populated = True if os.path.getsize(file_path) > 0 else False + return is_populated diff --git a/tests/resources/layer_dumps_for_br_lauro_de_freitas/layers_for_br_lauro_de_freitas.qgz b/tests/resources/layer_dumps_for_br_lauro_de_freitas/layers_for_br_lauro_de_freitas.qgz new file mode 100644 index 0000000..759515e Binary files /dev/null and b/tests/resources/layer_dumps_for_br_lauro_de_freitas/layers_for_br_lauro_de_freitas.qgz differ diff --git a/tests/resources/layer_dumps_for_br_lauro_de_freitas/test_write_layers_to_qgis_files.py b/tests/resources/layer_dumps_for_br_lauro_de_freitas/test_write_layers_to_qgis_files.py new file mode 100644 index 0000000..5e0efb9 --- /dev/null +++ b/tests/resources/layer_dumps_for_br_lauro_de_freitas/test_write_layers_to_qgis_files.py @@ -0,0 +1,145 @@ +# This code is mostly intended for manual execution +# Execution configuration is specified in the conftest file +import pytest + +from city_metrix.layers import ( + Albedo, + AlosDSM, + AverageNetBuildingHeight, + EsaWorldCover, + HighLandSurfaceTemperature, + LandsatCollection2, + LandSurfaceTemperature, + NasaDEM, + NaturalAreas, + OpenBuildings, + OpenStreetMap, + OvertureBuildings, + Sentinel2Level2, + NdviSentinel2, + SmartSurfaceLULC, + TreeCanopyHeight, + TreeCover, + UrbanLandUse, + WorldPop, Layer +) +from .conftest import RUN_DUMPS, prep_output_path, verify_file_is_populated + +@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +def test_write_albedo(target_folder, bbox_info): + file_path = prep_output_path(target_folder, 'albedo.tif') + Albedo().write(bbox_info.bounds, file_path, tile_degrees=None) + assert verify_file_is_populated(file_path) + +@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +def test_write_alos_dsm(target_folder, bbox_info): + file_path = prep_output_path(target_folder, 'alos_dsm.tif') + AlosDSM().write(bbox_info.bounds, file_path, tile_degrees=None) + assert verify_file_is_populated(file_path) + +@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +def test_write_average_net_building_height(target_folder, bbox_info): + file_path = prep_output_path(target_folder, 'average_net_building_height.tif') + AverageNetBuildingHeight().write(bbox_info.bounds, file_path, tile_degrees=None) + assert verify_file_is_populated(file_path) + +@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +def test_write_esa_world_cover(target_folder, bbox_info): + file_path = prep_output_path(target_folder, 'esa_world_cover.tif') + EsaWorldCover().write(bbox_info.bounds, file_path, tile_degrees=None) + assert verify_file_is_populated(file_path) + +@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +def test_write_high_land_surface_temperature(target_folder, bbox_info): + file_path = prep_output_path(target_folder, 'high_land_surface_temperature.tif') + HighLandSurfaceTemperature().write(bbox_info.bounds, file_path, tile_degrees=None) + assert verify_file_is_populated(file_path) + +@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +def test_write_land_surface_temperature(target_folder, bbox_info): + file_path = prep_output_path(target_folder, 'land_surface_temperature.tif') + LandSurfaceTemperature().write(bbox_info.bounds, file_path, tile_degrees=None) + assert verify_file_is_populated(file_path) + +# TODO Class is no longer used, but may be useful later +# @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +# def test_write_landsat_collection_2(target_folder, bbox_info): +# file_path = prep_output_path(target_folder, 'landsat_collection2.tif') +# bands = ['green'] +# LandsatCollection2(bands).write(bbox_info.bounds, file_path, tile_degrees=None) +# assert verify_file_is_populated(file_path) + +@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +def test_write_nasa_dem(target_folder, bbox_info): + file_path = prep_output_path(target_folder, 'nasa_dem.tif') + NasaDEM().write(bbox_info.bounds, file_path, tile_degrees=None) + assert verify_file_is_populated(file_path) + +@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +def test_write_natural_areas(target_folder, bbox_info): + file_path = prep_output_path(target_folder, 'natural_areas.tif') + NaturalAreas().write(bbox_info.bounds, file_path, tile_degrees=None) + assert verify_file_is_populated(file_path) + +@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +def test_write_ndvi_sentinel2_gee(target_folder, bbox_info): + file_path = prep_output_path(target_folder, 'ndvi_sentinel2_gee.tif') + NdviSentinel2(year=2023).write(bbox_info.bounds, file_path, tile_degrees=None) + assert verify_file_is_populated(file_path) + +@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +def test_write_openbuildings(target_folder, bbox_info): + file_path = prep_output_path(target_folder, 'open_buildings.tif') + OpenBuildings(bbox_info.country).write(bbox_info.bounds, file_path, tile_degrees=None) + assert verify_file_is_populated(file_path) + +# TODO Class write is not functional. Is class still needed or have we switched to overture? +# @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +# def test_write_open_street_map(target_folder, bbox_info): +# file_path = prep_output_path(target_folder, 'open_street_map.tif') +# OpenStreetMap().write(bbox_info.bounds, file_path, tile_degrees=None) +# assert verify_file_is_populated(file_path) + +@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +def test_write_overture_buildings(target_folder, bbox_info): + file_path = prep_output_path(target_folder, 'overture_buildings.tif') + OvertureBuildings().write(bbox_info.bounds, file_path, tile_degrees=None) + assert verify_file_is_populated(file_path) + +# TODO Class is no longer used, but may be useful later +# @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +# def test_write_sentinel_2_level2(target_folder, bbox_info): +# file_path = prep_output_path(target_folder, 'sentinel_2_level2.tif') +# sentinel_2_bands = ["green"] +# Sentinel2Level2(sentinel_2_bands).write(bbox_info.bounds, file_path, tile_degrees=None) +# assert verify_file_is_populated(file_path) + +@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +def test_write_smart_surface_lulc(target_folder, bbox_info): + file_path = prep_output_path(target_folder, 'smart_surface_lulc.tif') + SmartSurfaceLULC().write(bbox_info.bounds, file_path, tile_degrees=None) + assert verify_file_is_populated(file_path) + +@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +def test_write_tree_canopy_height(target_folder, bbox_info): + file_path = prep_output_path(target_folder, 'tree_canopy_height.tif') + TreeCanopyHeight().write(bbox_info.bounds, file_path, tile_degrees=None) + assert verify_file_is_populated(file_path) + +@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +def test_write_tree_cover(target_folder, bbox_info): + file_path = prep_output_path(target_folder, 'tree_cover.tif') + TreeCover().write(bbox_info.bounds, file_path, tile_degrees=None) + assert verify_file_is_populated(file_path) + +@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +def test_write_urban_land_use(target_folder, bbox_info): + file_path = prep_output_path(target_folder, 'urban_land_use.tif') + UrbanLandUse().write(bbox_info.bounds, file_path, tile_degrees=None) + assert verify_file_is_populated(file_path) + +@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') +def test_write_world_pop(target_folder, bbox_info): + file_path = prep_output_path(target_folder, 'world_pop.tif') + WorldPop().write(bbox_info.bounds, file_path, tile_degrees=None) + assert verify_file_is_populated(file_path) diff --git a/tests/test_layer_dimensions.py b/tests/test_layer_dimensions.py new file mode 100644 index 0000000..15768d6 --- /dev/null +++ b/tests/test_layer_dimensions.py @@ -0,0 +1,18 @@ +from city_metrix.layers import NdviSentinel2 +from tests.resources.bbox_constants import BBOX_BRA_LAURO_DE_FREITAS_1 +from tests.tools import post_process_layer + +COUNTRY_CODE_FOR_BBOX = 'BRA' +BBOX = BBOX_BRA_LAURO_DE_FREITAS_1 + +def test_ndvi_dimensions(): + data = NdviSentinel2(year=2023).get_data(BBOX) + data_for_map = post_process_layer(data, value_threshold=0.4, convert_to_percentage=True) + + expected_min = 0 + actual_min = data_for_map.values.min() + expected_max = 85 + actual_max = data_for_map.values.max() + + assert actual_min == expected_min + assert actual_max == expected_max diff --git a/tests/test_layers.py b/tests/test_layers.py new file mode 100644 index 0000000..bfcf4a0 --- /dev/null +++ b/tests/test_layers.py @@ -0,0 +1,161 @@ +import ee +import pytest + +from city_metrix.layers import ( + Albedo, + AlosDSM, + AverageNetBuildingHeight, + NdviSentinel2, + EsaWorldCover, + EsaWorldCoverClass, + HighLandSurfaceTemperature, + LandsatCollection2, + LandSurfaceTemperature, + NasaDEM, + NaturalAreas, + OpenBuildings, + OpenStreetMap, + OpenStreetMapClass, + OvertureBuildings, + Sentinel2Level2, + SmartSurfaceLULC, + TreeCanopyHeight, + TreeCover, + UrbanLandUse, + WorldPop +) +from city_metrix.layers.layer import get_image_collection +from tests.resources.bbox_constants import BBOX_BRA_LAURO_DE_FREITAS_1 + +EE_IMAGE_DIMENSION_TOLERANCE = 1 # Tolerance compensates for variable results from GEE service +# Tests are implemented for the same bounding box in Brazil. +COUNTRY_CODE_FOR_BBOX = 'BRA' +BBOX = BBOX_BRA_LAURO_DE_FREITAS_1 + +def test_albedo(): + assert Albedo().get_data(BBOX).mean() + + +def test_alos_dsm(): + mean = AlosDSM().get_data(BBOX).mean() + assert mean + + +def test_average_net_building_height(): + assert AverageNetBuildingHeight().get_data(BBOX).mean() + + +def test_esa_world_cover(): + count = ( + EsaWorldCover(land_cover_class=EsaWorldCoverClass.BUILT_UP) + .get_data(BBOX) + .count() + ) + assert count + + +def test_read_image_collection(): + ic = ee.ImageCollection("ESA/WorldCover/v100") + data = get_image_collection(ic, BBOX, 10, "test") + + expected_crs = 32724 + expected_x_dimension = 187 + expected_y_dimension = 199 + + assert data.rio.crs == expected_crs + assert ( + pytest.approx(expected_x_dimension, rel=EE_IMAGE_DIMENSION_TOLERANCE) == "x", + pytest.approx(expected_y_dimension, rel=EE_IMAGE_DIMENSION_TOLERANCE) == "y" + ) + + +def test_read_image_collection_scale(): + ic = ee.ImageCollection("ESA/WorldCover/v100") + data = get_image_collection(ic, BBOX, 100, "test") + expected_x_dimension = 19 + expected_y_dimension = 20 + assert data.dims == {"x": expected_x_dimension, "y": expected_y_dimension} + + +def test_high_land_surface_temperature(): + data = HighLandSurfaceTemperature().get_data(BBOX) + assert data.any() + + +def test_land_surface_temperature(): + mean_lst = LandSurfaceTemperature().get_data(BBOX).mean() + assert mean_lst + + +@pytest.mark.skip(reason="layer is deprecated") +def test_landsat_collection_2(): + bands = ["blue"] + data = LandsatCollection2(bands).get_data(BBOX) + assert data.any() + + +def test_nasa_dem(): + mean = NasaDEM().get_data(BBOX).mean() + assert mean + + +def test_natural_areas(): + data = NaturalAreas().get_data(BBOX) + assert data.any() + +def test_ndvi_sentinel2(): + data = NdviSentinel2(year=2023).get_data(BBOX) + assert data is not None + + +def test_openbuildings(): + count = OpenBuildings(COUNTRY_CODE_FOR_BBOX).get_data(BBOX).count().sum() + assert count + + +def test_open_street_map(): + count = ( + OpenStreetMap(osm_class=OpenStreetMapClass.ROAD) + .get_data(BBOX) + .count() + .sum() + ) + assert count + + +def test_overture_buildings(): + count = OvertureBuildings().get_data(BBOX).count().sum() + assert count + + +@pytest.mark.skip(reason="layer is deprecated") +def test_sentinel_2_level2(): + sentinel_2_bands = ["green"] + data = Sentinel2Level2(sentinel_2_bands).get_data(BBOX) + assert data.any() + + +def test_smart_surface_lulc(): + count = SmartSurfaceLULC().get_data(BBOX).count() + assert count + +def test_tree_canopy_height(): + count = TreeCanopyHeight().get_data(BBOX).count() + assert count + +def test_tree_cover(): + actual = TreeCover().get_data(BBOX).mean() + expected = 54.0 + tolerance = 0.1 + assert ( + pytest.approx(expected, rel=tolerance) == actual + ) + + +def test_urban_land_use(): + assert UrbanLandUse().get_data(BBOX).count() + + +def test_world_pop(): + data = WorldPop().get_data(BBOX) + assert data.any() diff --git a/tests/test_methods.py b/tests/test_methods.py new file mode 100644 index 0000000..176f305 --- /dev/null +++ b/tests/test_methods.py @@ -0,0 +1,57 @@ +import numpy as np + +from .conftest import ( + LARGE_ZONES, + ZONES, + MockGroupByLayer, + MockLargeGroupByLayer, + MockLargeLayer, + MockLayer, + MockMaskLayer, +) + + +def test_count(): + counts = MockLayer().groupby(ZONES).count() + assert counts.size == 100 + assert all([count == 100 for count in counts]) + + +def test_mean(): + means = MockLayer().groupby(ZONES).mean() + assert means.size == 100 + assert all([mean == i for i, mean in enumerate(means)]) + + +def test_fishnetted_count(): + counts = MockLargeLayer().groupby(LARGE_ZONES).count() + assert counts.size == 100 + assert all([count == 100 for count in counts]) + + +def test_fishnetted_mean(): + means = MockLargeLayer().groupby(LARGE_ZONES).mean() + assert means.size == 100 + assert all([mean == i for i, mean in enumerate(means)]) + + +def test_masks(): + counts = MockLayer().mask(MockMaskLayer()).groupby(ZONES).count() + assert counts.size == 100 + for i, count in enumerate(counts): + if i % 2 == 0: + assert np.isnan(count) + else: + assert count == 100 + + +def test_group_by_layer(): + counts = MockLayer().groupby(ZONES, layer=MockGroupByLayer()).count() + assert all([count == {1: 50.0, 2: 50.0} for count in counts]) + + +def test_group_by_large_layer(): + counts = ( + MockLargeLayer().groupby(LARGE_ZONES, layer=MockLargeGroupByLayer()).count() + ) + assert all([count == {1: 50.0, 2: 50.0} for count in counts]) diff --git a/tests/metrics.py b/tests/test_metrics.py similarity index 100% rename from tests/metrics.py rename to tests/test_metrics.py diff --git a/tests/tools.py b/tests/tools.py new file mode 100644 index 0000000..99425df --- /dev/null +++ b/tests/tools.py @@ -0,0 +1,35 @@ +import numpy as np + +def post_process_layer(data, value_threshold=0.4, convert_to_percentage=True): + """ + Applies the standard post-processing adjustment used for rendering of NDVI including masking + to a threshold and conversion to percentage values. + :param value_threshold: (float) minimum threshold for keeping values + :param convert_to_percentage: (bool) controls whether NDVI values are converted to a percentage + :return: A rioxarray-format DataArray + """ + # Remove values less than the specified threshold + if value_threshold is not None: + data = data.where(data >= value_threshold) + + # Convert to percentage in byte data_type + if convert_to_percentage is True: + data = convert_ratio_to_percentage(data) + + return data + +def convert_ratio_to_percentage(data): + """ + Converts xarray variable from a ratio to a percentage + :param data: (xarray) xarray to be converted + :return: A rioxarray-format DataArray + """ + + # convert to percentage and to bytes for efficient storage + values_as_percent = np.round(data * 100).astype(np.uint8) + + # reset CRS + source_crs = data.rio.crs + values_as_percent.rio.write_crs(source_crs, inplace=True) + + return values_as_percent diff --git a/tools/general_tools.py b/tools/general_tools.py new file mode 100644 index 0000000..b38d1b5 --- /dev/null +++ b/tools/general_tools.py @@ -0,0 +1,23 @@ +import os +import tempfile + + +def is_valid_path(path: str): + return os.path.exists(path) + +def create_target_folder(folder_path, delete_existing_files: bool): + if os.path.isdir(folder_path) is False: + os.makedirs(folder_path) + elif delete_existing_files is True: + remove_all_files_in_directory(folder_path) + +def remove_all_files_in_directory(directory): + # Iterate over all the files in the directory + for filename in os.listdir(directory): + file_path = os.path.join(directory, filename) + try: + # Check if it is a file and remove it + if os.path.isfile(file_path): + os.remove(file_path) + except Exception as e: + print(f"Error: {e}") \ No newline at end of file