diff --git a/data/.dockerignore b/data/.dockerignore deleted file mode 100644 index 999402d10..000000000 --- a/data/.dockerignore +++ /dev/null @@ -1,9 +0,0 @@ -.gitignore -.github -.editorconfig -.env.example -.pre-commit-config.yaml -data -tests -notebooks -docs diff --git a/data/.pre-commit-config.yaml b/data/.pre-commit-config.yaml index 545404f46..8aa55e294 100644 --- a/data/.pre-commit-config.yaml +++ b/data/.pre-commit-config.yaml @@ -1,6 +1,36 @@ repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.3.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.0.275 + hooks: + - id: ruff + args: [ "--line-length=100", "--select=E,F,N"] + - repo: https://github.com/psf/black - rev: 23.1.0 + rev: 23.3.0 hooks: - id: black - args: ["--line-length=120"] + args: [ "--line-length=100" ] + + - repo: https://github.com/nbQA-dev/nbQA + rev: 1.7.0 + hooks: + - id: nbqa-black + args: [ "--line-length=100"] + - id: nbqa-isort + args: [ "--float-to-top", "--profile=black"] + - id: nbqa-ruff + args: [ "--line-length=100" , "--select=E,F,N", "--fix"] + + # check for private keys and passwords! + - repo: https://github.com/gitleaks/gitleaks + rev: v8.17.0 + hooks: + - id: gitleaks-docker + diff --git a/data/Dockerfile b/data/importers/Dockerfile similarity index 87% rename from data/Dockerfile rename to data/importers/Dockerfile index 029c0e46c..639608660 100644 --- a/data/Dockerfile +++ b/data/importers/Dockerfile @@ -21,7 +21,7 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2 RUN pip install -q --upgrade --no-cache-dir pip -COPY ./requirements.txt requirements.txt +COPY requirements.txt requirements.txt RUN pip install -q --no-cache-dir -r requirements.txt WORKDIR / @@ -30,7 +30,7 @@ RUN mkdir -p data/ COPY ./base_data_importer/ /base_data_importer COPY ./data_download/ /data_download COPY h3_data_importer /h3_data_importer -COPY indicator_coefficient_importer/ /indicator_coefficient_importer -COPY ./Makefile ./Makefile +COPY indicator_coefficient_importer /indicator_coefficient_importer +COPY Makefile ./Makefile ENTRYPOINT ["/usr/bin/make"] diff --git a/data/Makefile b/data/importers/Makefile similarity index 100% rename from data/Makefile rename to data/importers/Makefile diff --git a/data/h3_data_importer/__init__.py b/data/importers/__init__.py similarity index 100% rename from data/h3_data_importer/__init__.py rename to data/importers/__init__.py diff --git a/data/base_data_importer/Makefile b/data/importers/base_data_importer/Makefile similarity index 100% rename from data/base_data_importer/Makefile rename to data/importers/base_data_importer/Makefile diff --git a/data/base_data_importer/README.md b/data/importers/base_data_importer/README.md similarity index 100% rename from data/base_data_importer/README.md rename to data/importers/base_data_importer/README.md diff --git a/data/base_data_importer/csv_to_table.py b/data/importers/base_data_importer/csv_to_table.py similarity index 100% rename from data/base_data_importer/csv_to_table.py rename to data/importers/base_data_importer/csv_to_table.py diff --git a/data/base_data_importer/data/1.units.csv b/data/importers/base_data_importer/data/1.units.csv similarity index 100% rename from data/base_data_importer/data/1.units.csv rename to data/importers/base_data_importer/data/1.units.csv diff --git a/data/base_data_importer/data/2.indicator.csv b/data/importers/base_data_importer/data/2.indicator.csv similarity index 100% rename from data/base_data_importer/data/2.indicator.csv rename to data/importers/base_data_importer/data/2.indicator.csv diff --git a/data/base_data_importer/data/3.unit_conversion.csv b/data/importers/base_data_importer/data/3.unit_conversion.csv similarity index 100% rename from data/base_data_importer/data/3.unit_conversion.csv rename to data/importers/base_data_importer/data/3.unit_conversion.csv diff --git a/data/base_data_importer/data/4.material.csv b/data/importers/base_data_importer/data/4.material.csv similarity index 100% rename from data/base_data_importer/data/4.material.csv rename to data/importers/base_data_importer/data/4.material.csv diff --git a/data/data.sh b/data/importers/data.sh similarity index 100% rename from data/data.sh rename to data/importers/data.sh diff --git a/data/data_download/Makefile b/data/importers/data_download/Makefile similarity index 100% rename from data/data_download/Makefile rename to data/importers/data_download/Makefile diff --git a/data/data_download/countriesregions.csv b/data/importers/data_download/countriesregions.csv similarity index 100% rename from data/data_download/countriesregions.csv rename to data/importers/data_download/countriesregions.csv diff --git a/data/data_download/populate_admin_regions.sql b/data/importers/data_download/populate_admin_regions.sql similarity index 100% rename from data/data_download/populate_admin_regions.sql rename to data/importers/data_download/populate_admin_regions.sql diff --git a/data/docker-compose.yml b/data/importers/docker-compose.yml similarity index 78% rename from data/docker-compose.yml rename to data/importers/docker-compose.yml index 26b89fdef..fe20c09b8 100644 --- a/data/docker-compose.yml +++ b/data/importers/docker-compose.yml @@ -2,9 +2,9 @@ version: "3.8" services: landgriffon-seed-data: build: - context: ./ + context: .. dockerfile: Dockerfile command: seed-data env_file: - - '../.env' + - '../../.env' network_mode: "host" diff --git a/data/h3_data_importer/LCIA_UNEP_SETAC/Ch3 CFs_Climate change v01.xlsx b/data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch3 CFs_Climate change v01.xlsx similarity index 100% rename from data/h3_data_importer/LCIA_UNEP_SETAC/Ch3 CFs_Climate change v01.xlsx rename to data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch3 CFs_Climate change v01.xlsx diff --git a/data/h3_data_importer/LCIA_UNEP_SETAC/Ch4_IntakeFractions+CharacterizationFactors v01.xlsx b/data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch4_IntakeFractions+CharacterizationFactors v01.xlsx similarity index 100% rename from data/h3_data_importer/LCIA_UNEP_SETAC/Ch4_IntakeFractions+CharacterizationFactors v01.xlsx rename to data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch4_IntakeFractions+CharacterizationFactors v01.xlsx diff --git a/data/h3_data_importer/LCIA_UNEP_SETAC/Ch5.1_AWARE - Watersheds.xlsx b/data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch5.1_AWARE - Watersheds.xlsx similarity index 100% rename from data/h3_data_importer/LCIA_UNEP_SETAC/Ch5.1_AWARE - Watersheds.xlsx rename to data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch5.1_AWARE - Watersheds.xlsx diff --git a/data/h3_data_importer/LCIA_UNEP_SETAC/Ch5.1_AWARE_country_regions_world_april2016.xlsx b/data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch5.1_AWARE_country_regions_world_april2016.xlsx similarity index 100% rename from data/h3_data_importer/LCIA_UNEP_SETAC/Ch5.1_AWARE_country_regions_world_april2016.xlsx rename to data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch5.1_AWARE_country_regions_world_april2016.xlsx diff --git a/data/h3_data_importer/LCIA_UNEP_SETAC/Ch5.2_Water-HumanHealthCFlist_2016-12-12 v01.xlsx b/data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch5.2_Water-HumanHealthCFlist_2016-12-12 v01.xlsx similarity index 100% rename from data/h3_data_importer/LCIA_UNEP_SETAC/Ch5.2_Water-HumanHealthCFlist_2016-12-12 v01.xlsx rename to data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch5.2_Water-HumanHealthCFlist_2016-12-12 v01.xlsx diff --git a/data/h3_data_importer/LCIA_UNEP_SETAC/Ch6 PSLglobal v01.xlsx b/data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch6 PSLglobal v01.xlsx similarity index 100% rename from data/h3_data_importer/LCIA_UNEP_SETAC/Ch6 PSLglobal v01.xlsx rename to data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch6 PSLglobal v01.xlsx diff --git a/data/h3_data_importer/LCIA_UNEP_SETAC/Ch6_PSL_regional_ecoregions_v01.csv b/data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch6_PSL_regional_ecoregions_v01.csv similarity index 100% rename from data/h3_data_importer/LCIA_UNEP_SETAC/Ch6_PSL_regional_ecoregions_v01.csv rename to data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch6_PSL_regional_ecoregions_v01.csv diff --git a/data/h3_data_importer/LCIA_UNEP_SETAC/Ch6_PSLregional_v01.xlsx b/data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch6_PSLregional_v01.xlsx similarity index 100% rename from data/h3_data_importer/LCIA_UNEP_SETAC/Ch6_PSLregional_v01.xlsx rename to data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch6_PSLregional_v01.xlsx diff --git a/data/h3_data_importer/Makefile b/data/importers/h3_data_importer/Makefile similarity index 100% rename from data/h3_data_importer/Makefile rename to data/importers/h3_data_importer/Makefile diff --git a/data/h3_data_importer/README.md b/data/importers/h3_data_importer/README.md similarity index 100% rename from data/h3_data_importer/README.md rename to data/importers/h3_data_importer/README.md diff --git a/data/indicator_coefficient_importer/__init__.py b/data/importers/h3_data_importer/__init__.py similarity index 100% rename from data/indicator_coefficient_importer/__init__.py rename to data/importers/h3_data_importer/__init__.py diff --git a/data/h3_data_importer/cog_to_contextual_layer_linker.py b/data/importers/h3_data_importer/cog_to_contextual_layer_linker.py similarity index 100% rename from data/h3_data_importer/cog_to_contextual_layer_linker.py rename to data/importers/h3_data_importer/cog_to_contextual_layer_linker.py diff --git a/data/h3_data_importer/commodity_mapping/commodity_tif_mapping_v1_20210809 - commodities.csv b/data/importers/h3_data_importer/commodity_mapping/commodity_tif_mapping_v1_20210809 - commodities.csv similarity index 100% rename from data/h3_data_importer/commodity_mapping/commodity_tif_mapping_v1_20210809 - commodities.csv rename to data/importers/h3_data_importer/commodity_mapping/commodity_tif_mapping_v1_20210809 - commodities.csv diff --git a/data/h3_data_importer/contextual_layers_metadata/bluewater_footprint_metadata.json b/data/importers/h3_data_importer/contextual_layers_metadata/bluewater_footprint_metadata.json similarity index 100% rename from data/h3_data_importer/contextual_layers_metadata/bluewater_footprint_metadata.json rename to data/importers/h3_data_importer/contextual_layers_metadata/bluewater_footprint_metadata.json diff --git a/data/h3_data_importer/contextual_layers_metadata/contextual_metadata_schema.json b/data/importers/h3_data_importer/contextual_layers_metadata/contextual_metadata_schema.json similarity index 100% rename from data/h3_data_importer/contextual_layers_metadata/contextual_metadata_schema.json rename to data/importers/h3_data_importer/contextual_layers_metadata/contextual_metadata_schema.json diff --git a/data/h3_data_importer/contextual_layers_metadata/h3_grid_aqueduct_global_metadata.json b/data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_aqueduct_global_metadata.json similarity index 100% rename from data/h3_data_importer/contextual_layers_metadata/h3_grid_aqueduct_global_metadata.json rename to data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_aqueduct_global_metadata.json diff --git a/data/h3_data_importer/contextual_layers_metadata/h3_grid_bio_global_metadata.json b/data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_bio_global_metadata.json similarity index 100% rename from data/h3_data_importer/contextual_layers_metadata/h3_grid_bio_global_metadata.json rename to data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_bio_global_metadata.json diff --git a/data/h3_data_importer/contextual_layers_metadata/h3_grid_carbon_global_metadata.json b/data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_carbon_global_metadata.json similarity index 100% rename from data/h3_data_importer/contextual_layers_metadata/h3_grid_carbon_global_metadata.json rename to data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_carbon_global_metadata.json diff --git a/data/h3_data_importer/contextual_layers_metadata/h3_grid_deforestation_global_metadata.json b/data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_deforestation_global_metadata.json similarity index 100% rename from data/h3_data_importer/contextual_layers_metadata/h3_grid_deforestation_global_metadata.json rename to data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_deforestation_global_metadata.json diff --git a/data/h3_data_importer/contextual_layers_metadata/h3_grid_hdi_global_metadata.json b/data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_hdi_global_metadata.json similarity index 100% rename from data/h3_data_importer/contextual_layers_metadata/h3_grid_hdi_global_metadata.json rename to data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_hdi_global_metadata.json diff --git a/data/h3_data_importer/csv_to_h3_table.py b/data/importers/h3_data_importer/csv_to_h3_table.py similarity index 100% rename from data/h3_data_importer/csv_to_h3_table.py rename to data/importers/h3_data_importer/csv_to_h3_table.py diff --git a/data/h3_data_importer/data_checksums/deforestation b/data/importers/h3_data_importer/data_checksums/deforestation similarity index 100% rename from data/h3_data_importer/data_checksums/deforestation rename to data/importers/h3_data_importer/data_checksums/deforestation diff --git a/data/h3_data_importer/data_checksums/forestGHG b/data/importers/h3_data_importer/data_checksums/forestGHG similarity index 100% rename from data/h3_data_importer/data_checksums/forestGHG rename to data/importers/h3_data_importer/data_checksums/forestGHG diff --git a/data/h3_data_importer/data_checksums/mapspam_ha b/data/importers/h3_data_importer/data_checksums/mapspam_ha similarity index 100% rename from data/h3_data_importer/data_checksums/mapspam_ha rename to data/importers/h3_data_importer/data_checksums/mapspam_ha diff --git a/data/h3_data_importer/data_checksums/mapspam_prod b/data/importers/h3_data_importer/data_checksums/mapspam_prod similarity index 100% rename from data/h3_data_importer/data_checksums/mapspam_prod rename to data/importers/h3_data_importer/data_checksums/mapspam_prod diff --git a/data/h3_data_importer/raster_folder_to_h3_table.py b/data/importers/h3_data_importer/raster_folder_to_h3_table.py similarity index 100% rename from data/h3_data_importer/raster_folder_to_h3_table.py rename to data/importers/h3_data_importer/raster_folder_to_h3_table.py diff --git a/data/h3_data_importer/utils.py b/data/importers/h3_data_importer/utils.py similarity index 100% rename from data/h3_data_importer/utils.py rename to data/importers/h3_data_importer/utils.py diff --git a/data/h3_data_importer/vector_folder_to_h3_table.py b/data/importers/h3_data_importer/vector_folder_to_h3_table.py similarity index 100% rename from data/h3_data_importer/vector_folder_to_h3_table.py rename to data/importers/h3_data_importer/vector_folder_to_h3_table.py diff --git a/data/indicator_coefficient_importer/Makefile b/data/importers/indicator_coefficient_importer/Makefile similarity index 100% rename from data/indicator_coefficient_importer/Makefile rename to data/importers/indicator_coefficient_importer/Makefile diff --git a/data/notebooks/Final/.gitkeep b/data/importers/indicator_coefficient_importer/__init__.py similarity index 100% rename from data/notebooks/Final/.gitkeep rename to data/importers/indicator_coefficient_importer/__init__.py diff --git a/data/indicator_coefficient_importer/data/bwfp_indicator_coefficients.csv b/data/importers/indicator_coefficient_importer/data/bwfp_indicator_coefficients.csv similarity index 100% rename from data/indicator_coefficient_importer/data/bwfp_indicator_coefficients.csv rename to data/importers/indicator_coefficient_importer/data/bwfp_indicator_coefficients.csv diff --git a/data/indicator_coefficient_importer/indicator_coefficient_importer.py b/data/importers/indicator_coefficient_importer/indicator_coefficient_importer.py similarity index 100% rename from data/indicator_coefficient_importer/indicator_coefficient_importer.py rename to data/importers/indicator_coefficient_importer/indicator_coefficient_importer.py diff --git a/data/requirements.txt b/data/importers/requirements.txt similarity index 100% rename from data/requirements.txt rename to data/importers/requirements.txt diff --git a/data/notebooks/Lab/0_1_Crop_data_exploration_and_upscaling.ipynb b/data/notebooks/Lab/0_1_Crop_data_exploration_and_upscaling.ipynb index adb972ea6..dae84cad6 100644 --- a/data/notebooks/Lab/0_1_Crop_data_exploration_and_upscaling.ipynb +++ b/data/notebooks/Lab/0_1_Crop_data_exploration_and_upscaling.ipynb @@ -15,17 +15,19 @@ } ], "source": [ - "import pandas as pd\n", - "import geopandas as gpd\n", "import csv\n", - "import requests\n", - "import zipfile\n", - "import os\n", "import io\n", - "import seaborn as sns\n", + "import os\n", + "import zipfile\n", + "\n", + "import geopandas as gpd\n", "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", "import rasterio as rio\n", "import rasterio.plot\n", + "import requests\n", + "import seaborn as sns\n", + "\n", "%matplotlib inline" ] }, @@ -189,7 +191,7 @@ } ], "source": [ - "processed_data = gpd.read_file('../../datasets/processed/user_data/located_lg_data_point_v2.shp')\n", + "processed_data = gpd.read_file(\"../../datasets/processed/user_data/located_lg_data_point_v2.shp\")\n", "processed_data.head()" ] }, @@ -210,7 +212,6 @@ } ], "source": [ - "\n", "sns.set_style(style=\"darkgrid\")\n", "ax = sns.countplot(x=\"Material\", data=processed_data)" ] @@ -236,7 +237,7 @@ } ], "source": [ - "processed_data.groupby('Material')['Volume'].sum()" + "processed_data.groupby(\"Material\")[\"Volume\"].sum()" ] }, { @@ -285,12 +286,12 @@ "with requests.Session() as s:\n", " download = s.get(url)\n", "\n", - " decoded_content = download.content.decode('utf-8')\n", + " decoded_content = download.content.decode(\"utf-8\")\n", "\n", - " data = csv.reader(decoded_content.splitlines(), delimiter=',')\n", + " data = csv.reader(decoded_content.splitlines(), delimiter=\",\")\n", " my_list = list(data)\n", - " \n", - " \n", + "\n", + "\n", "FAO_df = pd.DataFrame(my_list, columns=my_list[0])" ] }, @@ -470,9 +471,9 @@ "with requests.Session() as s:\n", " download = s.get(url)\n", "\n", - " decoded_content = download.content.decode('utf-8')\n", + " decoded_content = download.content.decode(\"utf-8\")\n", "\n", - " data = csv.reader(decoded_content.splitlines(), delimiter=',')\n", + " data = csv.reader(decoded_content.splitlines(), delimiter=\",\")\n", " my_list = list(data)\n", "FAO_indonesia_df = pd.DataFrame(my_list, columns=my_list[0])" ] @@ -962,15 +963,15 @@ "# http://www.earthstat.org/harvested-area-yield-175-crops/\n", "url = \"https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/cotton_HarvAreaYield_Geotiff.zip\"\n", "\n", - "local_path = '../raw_data/cotton_earthstat'\n", + "local_path = \"../raw_data/cotton_earthstat\"\n", "if not os.path.isdir(local_path):\n", " os.mkdir(local_path)\n", "\n", - " print('Downloading shapefile...')\n", + " print(\"Downloading shapefile...\")\n", " r = requests.get(url)\n", " z = zipfile.ZipFile(io.BytesIO(r.content))\n", " print(\"Done\")\n", - " z.extractall(path=local_path) # extract to folder\n", + " z.extractall(path=local_path) # extract to folder\n", " print(\"Data extracted!\")" ] }, @@ -994,12 +995,13 @@ ], "source": [ "# Use rasterio to import the reprojected data as img\n", - "out_path = '../raw_data/cotton_earthstat/cotton_HarvAreaYield_Geotiff/cotton_HarvestedAreaHectares.tif'\n", + "out_path = (\n", + " \"../raw_data/cotton_earthstat/cotton_HarvAreaYield_Geotiff/cotton_HarvestedAreaHectares.tif\"\n", + ")\n", "with rio.open(out_path) as src:\n", " arr = src.read(out_shape=(src.height, src.width))\n", "\n", "\n", - "\n", "plt.imshow(arr[0])\n", "plt.show()" ] @@ -1022,15 +1024,15 @@ "source": [ "# get country data from\n", "url = \"https://biogeo.ucdavis.edu/data/gadm3.6/shp/gadm36_IDN_shp.zip\"\n", - "local_path = '../raw_data/gadm_indonesia'\n", + "local_path = \"../raw_data/gadm_indonesia\"\n", "if not os.path.isdir(local_path):\n", " os.mkdir(local_path)\n", "\n", - " print('Downloading shapefile...')\n", + " print(\"Downloading shapefile...\")\n", " r = requests.get(url)\n", " z = zipfile.ZipFile(io.BytesIO(r.content))\n", " print(\"Done\")\n", - " z.extractall(path=local_path) # extract to folder\n", + " z.extractall(path=local_path) # extract to folder\n", " print(\"Data extracted!\")" ] }, @@ -1108,7 +1110,7 @@ } ], "source": [ - "gadm_ind = gpd.read_file('../raw_data/gadm_indonesia/gadm36_IDN_0.shp')\n", + "gadm_ind = gpd.read_file(\"../raw_data/gadm_indonesia/gadm36_IDN_0.shp\")\n", "gadm_ind.head()" ] }, @@ -1173,14 +1175,16 @@ } ], "source": [ - "with rio.open('../raw_data/cotton_earthstat/cotton_HarvAreaYield_Geotiff/cotton_HarvestedAreaHectares.tif') as src:\n", + "with rio.open(\n", + " \"../raw_data/cotton_earthstat/cotton_HarvAreaYield_Geotiff/cotton_HarvestedAreaHectares.tif\"\n", + ") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[10,5])\n", - " ax.set_ylim((-10,8))\n", - " ax.set_xlim((90,145))\n", - " rio.plot.show(dat, vmax=100, cmap='BrBG', ax=ax, transform=src.transform)\n", - " gadm_ind.plot(ax=ax, color='', edgecolor='yellow')\n", - " ax.set_title('Cotton production in Indonesia (green: higher production)')" + " fig, ax = plt.subplots(figsize=[10, 5])\n", + " ax.set_ylim((-10, 8))\n", + " ax.set_xlim((90, 145))\n", + " rio.plot.show(dat, vmax=100, cmap=\"BrBG\", ax=ax, transform=src.transform)\n", + " gadm_ind.plot(ax=ax, color=\"\", edgecolor=\"yellow\")\n", + " ax.set_title(\"Cotton production in Indonesia (green: higher production)\")" ] }, { @@ -1198,7 +1202,7 @@ } ], "source": [ - "feature = RetrieveBoundaries(query='India')" + "feature = RetrieveBoundaries(query=\"India\")" ] }, { @@ -1263,8 +1267,7 @@ } ], "source": [ - "\n", - "gdf = gpd.GeoDataFrame.from_features(feature.feature_json, crs='epsg:4326')\n", + "gdf = gpd.GeoDataFrame.from_features(feature.feature_json, crs=\"epsg:4326\")\n", "gdf" ] }, @@ -1327,14 +1330,16 @@ } ], "source": [ - "with rio.open('../raw_data/cotton_earthstat/cotton_HarvAreaYield_Geotiff/cotton_HarvestedAreaHectares.tif') as src:\n", + "with rio.open(\n", + " \"../raw_data/cotton_earthstat/cotton_HarvAreaYield_Geotiff/cotton_HarvestedAreaHectares.tif\"\n", + ") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((-5,40))\n", - " ax.set_xlim((60,100))\n", - " rio.plot.show(dat, vmax=100, cmap='BrBG', ax=ax, transform=src.transform)\n", - " gdf.plot(ax=ax, color='', edgecolor='yellow')\n", - " ax.set_title('Cotton production in India (green: higher production)')" + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((-5, 40))\n", + " ax.set_xlim((60, 100))\n", + " rio.plot.show(dat, vmax=100, cmap=\"BrBG\", ax=ax, transform=src.transform)\n", + " gdf.plot(ax=ax, color=\"\", edgecolor=\"yellow\")\n", + " ax.set_title(\"Cotton production in India (green: higher production)\")" ] }, { @@ -1372,8 +1377,8 @@ ], "source": [ "## get upscaling factor for indonesia\n", - "upscaling_factor = float(FAO_indonesia_df.iloc[1].Value)/float(FAO_indonesia_df.iloc[0].Value)\n", - "print(f'The upscaling factor is: {upscaling_factor}')" + "upscaling_factor = float(FAO_indonesia_df.iloc[1].Value) / float(FAO_indonesia_df.iloc[0].Value)\n", + "print(f\"The upscaling factor is: {upscaling_factor}\")" ] }, { @@ -1456,7 +1461,7 @@ ], "source": [ "## ad upscaling factor to admin area\n", - "gadm_ind['scl_factor']=upscaling_factor\n", + "gadm_ind[\"scl_factor\"] = upscaling_factor\n", "gadm_ind" ] }, @@ -1482,8 +1487,10 @@ } ], "source": [ - "#generate a raster mask with value 1 for the harvest area tiff\n", - "with rio.open('../raw_data/cotton_earthstat/cotton_HarvAreaYield_Geotiff/cotton_HarvestedAreaHectares.tif') as src:\n", + "# generate a raster mask with value 1 for the harvest area tiff\n", + "with rio.open(\n", + " \"../raw_data/cotton_earthstat/cotton_HarvAreaYield_Geotiff/cotton_HarvestedAreaHectares.tif\"\n", + ") as src:\n", " print(src.profile)" ] }, @@ -1519,8 +1526,8 @@ } ], "source": [ - "#check the mask\n", - "with rio.open('../raw_data/harvest_area_scale_factor_mask_v2.tif') as src:\n", + "# check the mask\n", + "with rio.open(\"../raw_data/harvest_area_scale_factor_mask_v2.tif\") as src:\n", " print(src.profile)" ] }, @@ -1544,12 +1551,11 @@ ], "source": [ "# Use rasterio to import the reprojected data as img\n", - "out_path = '../raw_data/harvest_area_scale_factor_mask_v2.tif'\n", + "out_path = \"../raw_data/harvest_area_scale_factor_mask_v2.tif\"\n", "with rio.open(out_path) as src:\n", " arr = src.read(out_shape=(src.height, src.width))\n", "\n", "\n", - "\n", "plt.imshow(arr[0])\n", "plt.show()" ] @@ -1612,16 +1618,14 @@ } ], "source": [ - "\n", - "\n", - "with rio.open('../Processed_data/cotton_2001_harvest_area.tif') as src:\n", + "with rio.open(\"../Processed_data/cotton_2001_harvest_area.tif\") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[10,5])\n", - " ax.set_ylim((-10,8))\n", - " ax.set_xlim((90,145))\n", - " rio.plot.show(dat, vmax=100, cmap='BrBG', ax=ax, transform=src.transform)\n", - " gadm_ind.plot(ax=ax, color='', edgecolor='yellow')\n", - " ax.set_title('Cotton production in Indonesia 2001 (green: higher production)')" + " fig, ax = plt.subplots(figsize=[10, 5])\n", + " ax.set_ylim((-10, 8))\n", + " ax.set_xlim((90, 145))\n", + " rio.plot.show(dat, vmax=100, cmap=\"BrBG\", ax=ax, transform=src.transform)\n", + " gadm_ind.plot(ax=ax, color=\"\", edgecolor=\"yellow\")\n", + " ax.set_title(\"Cotton production in Indonesia 2001 (green: higher production)\")" ] }, { @@ -1666,17 +1670,17 @@ "metadata": {}, "outputs": [], "source": [ - "#download data from fao\n", - "#download country specific yield for cotton from 2000 to 2019\n", + "# download data from fao\n", + "# download country specific yield for cotton from 2000 to 2019\n", "\n", "url = \"http://fenixservices.fao.org/faostat/api/v1/en/data/QC?area=5000%3E&area_cs=FAO&element=2413&item=328&item_cs=FAO&year=2000%2C2001%2C2002%2C2003%2C2004%2C2005%2C2006%2C2007%2C2008%2C2009%2C2010%2C2011%2C2012%2C2013%2C2014%2C2015%2C2016%2C2017%2C2018%2C2019&show_codes=true&show_unit=true&show_flags=true&null_values=false&output_type=csv\"\n", "\n", "with requests.Session() as s:\n", " download = s.get(url)\n", "\n", - " decoded_content = download.content.decode('utf-8')\n", + " decoded_content = download.content.decode(\"utf-8\")\n", "\n", - " data = csv.reader(decoded_content.splitlines(), delimiter=',')\n", + " data = csv.reader(decoded_content.splitlines(), delimiter=\",\")\n", " my_list = list(data)\n", "FAO_df_2000_2019 = pd.DataFrame(my_list, columns=my_list[0])" ] @@ -1852,20 +1856,23 @@ "metadata": {}, "outputs": [], "source": [ - "#clean dataframe to get just one year\n", + "# clean dataframe to get just one year\n", "\n", - "unique_countries = list(FAO_df_2000_2019['Area'].unique())\n", - "unique_years = list(FAO_df_2000_2019['Year'].unique())\n", + "unique_countries = list(FAO_df_2000_2019[\"Area\"].unique())\n", + "unique_years = list(FAO_df_2000_2019[\"Year\"].unique())\n", "list_values = {}\n", "for country in unique_countries:\n", " list_values[country] = {}\n", " for year in unique_years:\n", " try:\n", - " value = float(FAO_df_2000_2019[(FAO_df_2000_2019['Area']==country) & (FAO_df_2000_2019['Year']==year)].iloc[0]['Value'])\n", + " value = float(\n", + " FAO_df_2000_2019[\n", + " (FAO_df_2000_2019[\"Area\"] == country) & (FAO_df_2000_2019[\"Year\"] == year)\n", + " ].iloc[0][\"Value\"]\n", + " )\n", " except:\n", - " value = 0 \n", - " list_values[country][year] = value\n", - " " + " value = 0\n", + " list_values[country][year] = value" ] }, { @@ -2280,8 +2287,8 @@ } ], "source": [ - "#calculate percentage change \n", - "fao_df_pf = fao_df_pf.pct_change(axis='columns')\n", + "# calculate percentage change\n", + "fao_df_pf = fao_df_pf.pct_change(axis=\"columns\")\n", "fao_df_pf.head()" ] }, @@ -2498,8 +2505,8 @@ } ], "source": [ - "fao_df_pf['mean'] = fao_df_pf.mean(axis=1)\n", - "fao_df_pf['median'] = fao_df_pf.median(axis=1)\n", + "fao_df_pf[\"mean\"] = fao_df_pf.mean(axis=1)\n", + "fao_df_pf[\"median\"] = fao_df_pf.median(axis=1)\n", "fao_df_pf.head()" ] }, @@ -2838,6 +2845,7 @@ ], "source": [ "import pandas_bokeh\n", + "\n", "pandas_bokeh.output_notebook()" ] }, @@ -2930,7 +2938,7 @@ } ], "source": [ - "fao_df_pf.plot_bokeh(kind='bar') " + "fao_df_pf.plot_bokeh(kind=\"bar\")" ] }, { @@ -3022,7 +3030,7 @@ } ], "source": [ - "fao_df_pf[['mean','median']].plot_bokeh(kind='bar') " + "fao_df_pf[[\"mean\", \"median\"]].plot_bokeh(kind=\"bar\")" ] }, { @@ -3114,7 +3122,7 @@ } ], "source": [ - "fao_df_pf_transpose.plot_bokeh(kind='line')" + "fao_df_pf_transpose.plot_bokeh(kind=\"line\")" ] }, { @@ -3206,7 +3214,7 @@ } ], "source": [ - "fao_df_pf_transpose.loc[['mean', 'median']].plot_bokeh(kind='bar')" + "fao_df_pf_transpose.loc[[\"mean\", \"median\"]].plot_bokeh(kind=\"bar\")" ] }, { @@ -3298,7 +3306,9 @@ } ], "source": [ - "fao_df_pf_transpose.loc[['mean', 'median']][['Afghanistan', 'Albania', 'Algeria', 'Angola']].plot_bokeh(kind='bar')" + "fao_df_pf_transpose.loc[[\"mean\", \"median\"]][\n", + " [\"Afghanistan\", \"Albania\", \"Algeria\", \"Angola\"]\n", + "].plot_bokeh(kind=\"bar\")" ] }, { @@ -3390,7 +3400,7 @@ } ], "source": [ - "fao_df_pf.transpose()[['Afghanistan', 'Albania', 'Algeria', 'Angola']].plot_bokeh(kind='line')" + "fao_df_pf.transpose()[[\"Afghanistan\", \"Albania\", \"Algeria\", \"Angola\"]].plot_bokeh(kind=\"line\")" ] }, { diff --git a/data/notebooks/Lab/0_2_Manage_unknonw_locations.ipynb b/data/notebooks/Lab/0_2_Manage_unknonw_locations.ipynb index 1fdb14008..d53b31cbf 100644 --- a/data/notebooks/Lab/0_2_Manage_unknonw_locations.ipynb +++ b/data/notebooks/Lab/0_2_Manage_unknonw_locations.ipynb @@ -52,12 +52,10 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", + "import folium\n", "import geopandas as gpd\n", - "import os\n", + "import pandas as pd\n", "from IPython.display import Image\n", - "import requests\n", - "import folium\n", "from processing.geolocating_data import GeolocateAddress" ] }, @@ -189,7 +187,7 @@ } ], "source": [ - "input_data = pd.read_csv('../raw_data/LG fake data sheet - Raw materials.csv')\n", + "input_data = pd.read_csv(\"../raw_data/LG fake data sheet - Raw materials.csv\")\n", "input_data.head()" ] }, @@ -313,7 +311,7 @@ ], "source": [ "## get unknown locations\n", - "unknown_data = input_data[input_data['Location type'] =='Unknown']\n", + "unknown_data = input_data[input_data[\"Location type\"] == \"Unknown\"]\n", "unknown_data.head()" ] }, @@ -413,11 +411,11 @@ } ], "source": [ - "for i in range(0,len(unknown_data_china)):\n", + "for i in range(0, len(unknown_data_china)):\n", " row = unknown_data_china.iloc[0]\n", - " if row['Location type'] == 'Unknown':\n", - " country = row['Country']\n", - " commodity = row['Material']\n", + " if row[\"Location type\"] == \"Unknown\":\n", + " country = row[\"Country\"]\n", + " commodity = row[\"Material\"]\n", " print(country, commodity)" ] }, @@ -446,7 +444,7 @@ ], "source": [ "# image showing imports of rubber to chinna\n", - "Image(filename = \"../raw_data/Screenshot from 2021-04-30 11-29-40.png\", width = 900, height = 300)" + "Image(filename=\"../raw_data/Screenshot from 2021-04-30 11-29-40.png\", width=900, height=300)" ] }, { @@ -676,9 +674,11 @@ "## category 17 is rubber\n", "## TODO - get list of importers and categories\n", "\n", - "url = 'https://api.resourcetrade.earth/api/rt/2.3/downloads?year=2019&importer=156&category=17&units=value&autozoom=1'\n", + "url = \"https://api.resourcetrade.earth/api/rt/2.3/downloads?year=2019&importer=156&category=17&units=value&autozoom=1\"\n", "\n", - "df_imports = pd.read_excel (r'../raw_data/resourcetradeearth-all-156-17-2019.xlsx', sheet_name='Exporters')\n", + "df_imports = pd.read_excel(\n", + " r\"../raw_data/resourcetradeearth-all-156-17-2019.xlsx\", sheet_name=\"Exporters\"\n", + ")\n", "df_imports" ] }, @@ -895,8 +895,8 @@ } ], "source": [ - "#do the analysis just for one year - year 2019\n", - "df_imports_2019 = df_imports[df_imports['Year']==2019]\n", + "# do the analysis just for one year - year 2019\n", + "df_imports_2019 = df_imports[df_imports[\"Year\"] == 2019]\n", "df_imports_2019" ] }, @@ -915,7 +915,7 @@ } ], "source": [ - "print(f'There are {len(df_imports_2019)} exporters of rubber to chinna')" + "print(f\"There are {len(df_imports_2019)} exporters of rubber to chinna\")" ] }, { @@ -1535,20 +1535,20 @@ } ], "source": [ - "#retrieve the geometries for each country \n", + "# retrieve the geometries for each country\n", "geometry_list = []\n", "for i in range(0, len(df_imports_2019)):\n", " row = df_imports_2019.iloc[i]\n", - " country = row['Exporter']\n", + " country = row[\"Exporter\"]\n", " try:\n", " geolocation = GeolocateAddress(query=country)\n", - " gdf = gpd.GeoDataFrame.from_features(geolocation.polygon_json, crs='epsg:4326')\n", - " geom = gdf['geometry'].iloc[0]\n", + " gdf = gpd.GeoDataFrame.from_features(geolocation.polygon_json, crs=\"epsg:4326\")\n", + " geom = gdf[\"geometry\"].iloc[0]\n", " except:\n", - " print(f'Geolocation for the location {country} has failed!')\n", - " geom = 'None'\n", - " \n", - " geometry_list.append(geom)\n" + " print(f\"Geolocation for the location {country} has failed!\")\n", + " geom = \"None\"\n", + "\n", + " geometry_list.append(geom)" ] }, { @@ -1695,8 +1695,8 @@ } ], "source": [ - "#append geometry to gdf\n", - "df_imports_2019['Geometry'] = geometry_list\n", + "# append geometry to gdf\n", + "df_imports_2019[\"Geometry\"] = geometry_list\n", "df_imports_2019.head()" ] }, @@ -1783,7 +1783,7 @@ } ], "source": [ - "df_imports_2019[df_imports_2019['Geometry']=='None']" + "df_imports_2019[df_imports_2019[\"Geometry\"] == \"None\"]" ] }, { @@ -1918,8 +1918,8 @@ } ], "source": [ - "#remove no valid geoms (the none)\n", - "df_imports_2019 = df_imports_2019[df_imports_2019['Geometry']!='None']\n", + "# remove no valid geoms (the none)\n", + "df_imports_2019 = df_imports_2019[df_imports_2019[\"Geometry\"] != \"None\"]\n", "df_imports_2019.head()" ] }, @@ -1930,8 +1930,8 @@ "metadata": {}, "outputs": [], "source": [ - "#set geometry to gdf\n", - "df_imports_2019 = df_imports_2019.set_geometry('Geometry')" + "# set geometry to gdf\n", + "df_imports_2019 = df_imports_2019.set_geometry(\"Geometry\")" ] }, { @@ -1964,7 +1964,6 @@ } ], "source": [ - "\n", "df_imports_2019.plot()" ] }, @@ -1975,7 +1974,17 @@ "metadata": {}, "outputs": [], "source": [ - "df_imports_2019 = df_imports_2019[['Exporter ISO3', 'Exporter','Resource', 'Year', 'Value (1000USD)', 'Weight (1000kg)', 'Geometry' ]]" + "df_imports_2019 = df_imports_2019[\n", + " [\n", + " \"Exporter ISO3\",\n", + " \"Exporter\",\n", + " \"Resource\",\n", + " \"Year\",\n", + " \"Value (1000USD)\",\n", + " \"Weight (1000kg)\",\n", + " \"Geometry\",\n", + " ]\n", + "]" ] }, { @@ -1996,17 +2005,17 @@ } ], "source": [ - "#split geolocated data by polygon and points for saving\n", - "gdf_polygon = df_imports_2019[df_imports_2019['Geometry'].apply(lambda x : x.type!='Point' )]\n", - "gdf_point = df_imports_2019[df_imports_2019['Geometry'].apply(lambda x : x.type=='Point' )]\n", + "# split geolocated data by polygon and points for saving\n", + "gdf_polygon = df_imports_2019[df_imports_2019[\"Geometry\"].apply(lambda x: x.type != \"Point\")]\n", + "gdf_point = df_imports_2019[df_imports_2019[\"Geometry\"].apply(lambda x: x.type == \"Point\")]\n", "\n", "gdf_polygon.to_file(\n", - " '../Processed_data/china_2019_rubber_imports_polygon.shp',\n", - " driver='ESRI Shapefile',\n", + " \"../Processed_data/china_2019_rubber_imports_polygon.shp\",\n", + " driver=\"ESRI Shapefile\",\n", ")\n", "gdf_point.to_file(\n", - " '../Processed_data/china_2019_rubber_imports_point.shp',\n", - " driver='ESRI Shapefile',\n", + " \"../Processed_data/china_2019_rubber_imports_point.shp\",\n", + " driver=\"ESRI Shapefile\",\n", ")" ] }, @@ -2017,11 +2026,7 @@ "metadata": {}, "outputs": [], "source": [ - "gdf_polygon.sort_values(\n", - " 'Value (1000USD)',\n", - " ascending=False,\n", - " inplace = True\n", - ")" + "gdf_polygon.sort_values(\"Value (1000USD)\", ascending=False, inplace=True)" ] }, { @@ -2186,9 +2191,9 @@ } ], "source": [ - "top_exporters = top_exporters.set_crs('epsg:4326')\n", - "m = folium.Map(location=[43.062776, -75.420884],tiles=\"cartodbpositron\", zoom_start=7)\n", - "folium.GeoJson(data=top_exporters[\"Geometry\"]).add_to(m) \n", + "top_exporters = top_exporters.set_crs(\"epsg:4326\")\n", + "m = folium.Map(location=[43.062776, -75.420884], tiles=\"cartodbpositron\", zoom_start=7)\n", + "folium.GeoJson(data=top_exporters[\"Geometry\"]).add_to(m)\n", "m" ] }, @@ -2231,10 +2236,9 @@ } ], "source": [ - "Country_production_perct = (Export_quantity*100)/production\n", - "print(f'The country production is: {Country_production_perct} %')\n", - "print(f'The country import is: {100-Country_production_perct}%')\n", - " " + "Country_production_perct = (Export_quantity * 100) / production\n", + "print(f\"The country production is: {Country_production_perct} %\")\n", + "print(f\"The country import is: {100-Country_production_perct}%\")" ] }, { @@ -2264,14 +2268,14 @@ ], "source": [ "# total value in china for 2019 is 11377881.6251786\n", - "total_value_traders = sum(list(top_exporters['Value (1000USD)']))\n", - "print(f'The total value is: {total_value_traders}')\n", + "total_value_traders = sum(list(top_exporters[\"Value (1000USD)\"]))\n", + "print(f\"The total value is: {total_value_traders}\")\n", "\n", "weight_list = []\n", "for i in range(0, len(top_exporters)):\n", " row = top_exporters.iloc[i]\n", - " value = row['Value (1000USD)']\n", - " weight_value = value/ total_value_traders\n", + " value = row[\"Value (1000USD)\"]\n", + " weight_value = value / total_value_traders\n", " weight_list.append(weight_value)" ] }, @@ -2293,7 +2297,7 @@ } ], "source": [ - "#check that total weight is 1\n", + "# check that total weight is 1\n", "sum(weight_list)" ] }, @@ -2317,7 +2321,7 @@ } ], "source": [ - "top_exporters['Weight_value'] = weight_list" + "top_exporters[\"Weight_value\"] = weight_list" ] }, { @@ -2494,8 +2498,8 @@ ], "source": [ "top_exporters.to_file(\n", - " '../Processed_data/china_2019_rubber_imports_topExprters.shp',\n", - " driver='ESRI Shapefile',\n", + " \"../Processed_data/china_2019_rubber_imports_topExprters.shp\",\n", + " driver=\"ESRI Shapefile\",\n", ")" ] }, diff --git a/data/notebooks/Lab/0_3_H3_Data_Exploration.ipynb b/data/notebooks/Lab/0_3_H3_Data_Exploration.ipynb index 4ab2bddc4..7b62b1907 100644 --- a/data/notebooks/Lab/0_3_H3_Data_Exploration.ipynb +++ b/data/notebooks/Lab/0_3_H3_Data_Exploration.ipynb @@ -39,7 +39,7 @@ ], "source": [ "# insert code here\n", - "!pip install h3\n" + "!pip install h3" ] }, { @@ -55,22 +55,16 @@ "metadata": {}, "outputs": [], "source": [ - "#import library\n", + "# import library\n", "\n", "import json\n", - "import pandas as pd\n", - "from pandas.io.json import json_normalize\n", - "import numpy as np\n", "\n", - "import statistics\n", - "import statsmodels as sm\n", - "import statsmodels.formula.api as sm_formula\n", - "from scipy import stats\n", + "import pandas as pd\n", "\n", - "#import tensorflow as tf\n", - "#from tensorflow.keras import layers, models\n", + "# import tensorflow as tf\n", + "# from tensorflow.keras import layers, models\n", "#\n", - "#print(tf.__version__)" + "# print(tf.__version__)" ] }, { @@ -80,7 +74,8 @@ "outputs": [], "source": [ "import warnings\n", - "warnings.filterwarnings('ignore')" + "\n", + "warnings.filterwarnings(\"ignore\")" ] }, { @@ -89,16 +84,14 @@ "metadata": {}, "outputs": [], "source": [ - "import h3\n", - "\n", "import geopandas as gpd\n", - "\n", + "import h3\n", + "from geojson.feature import *\n", "from shapely import geometry, ops\n", - "#import libpysal as pys\n", - "#import esda\n", - "#import pointpats as pp\n", "\n", - "from geojson.feature import *" + "# import libpysal as pys\n", + "# import esda\n", + "# import pointpats as pp" ] }, { @@ -107,21 +100,13 @@ "metadata": {}, "outputs": [], "source": [ - "#from annoy import AnnoyIndex\n", - "\n", - "import bisect\n", - "import itertools\n", - "#from more_itertools import unique_everseen\n", + "# from annoy import AnnoyIndex\n", "\n", "import math\n", - "import random\n", - "import decimal\n", "from collections import Counter\n", "\n", - "from pprint import pprint\n", - "import copy\n", "\n", - "from tqdm import tqdm\n" + "# from more_itertools import unique_everseen" ] }, { @@ -130,22 +115,12 @@ "metadata": {}, "outputs": [], "source": [ - "#import pydeck\n", + "# import pydeck\n", "\n", - "from folium import Map, Marker, GeoJson\n", - "from folium.plugins import MarkerCluster\n", - "import branca.colormap as cm\n", - "from branca.colormap import linear\n", "import folium\n", - "\n", - "import seaborn as sns\n", - "\n", - "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", - "from matplotlib.pyplot import imshow\n", - "import matplotlib.gridspec as gridspec\n", - "\n", - "from PIL import Image as pilim\n", + "from folium import GeoJson, Map, Marker\n", + "from folium.plugins import MarkerCluster\n", "\n", "%matplotlib inline" ] @@ -406,28 +381,30 @@ "list_hex_area_sqm = []\n", "\n", "for i in range(0, max_res + 1):\n", - " ekm = h3.edge_length(resolution=i, unit='km')\n", - " em = h3.edge_length(resolution=i, unit='m')\n", + " ekm = h3.edge_length(resolution=i, unit=\"km\")\n", + " em = h3.edge_length(resolution=i, unit=\"m\")\n", " list_hex_edge_km.append(round(ekm, 3))\n", " list_hex_edge_m.append(round(em, 3))\n", " list_hex_perimeter_km.append(round(6 * ekm, 3))\n", " list_hex_perimeter_m.append(round(6 * em, 3))\n", "\n", - " akm = h3.hex_area(resolution=i, unit='km^2')\n", - " am = h3.hex_area(resolution=i, unit='m^2')\n", + " akm = h3.hex_area(resolution=i, unit=\"km^2\")\n", + " am = h3.hex_area(resolution=i, unit=\"m^2\")\n", " list_hex_area_sqkm.append(round(akm, 3))\n", " list_hex_area_sqm.append(round(am, 3))\n", "\n", - "df_meta = pd.DataFrame({\"edge_length_km\": list_hex_edge_km,\n", - " \"perimeter_km\": list_hex_perimeter_km,\n", - " \"area_sqkm\": list_hex_area_sqkm,\n", - " \"edge_length_m\": list_hex_edge_m,\n", - " \"perimeter_m\": list_hex_perimeter_m,\n", - " \"area_sqm\": list_hex_area_sqm\n", - " })\n", + "df_meta = pd.DataFrame(\n", + " {\n", + " \"edge_length_km\": list_hex_edge_km,\n", + " \"perimeter_km\": list_hex_perimeter_km,\n", + " \"area_sqkm\": list_hex_area_sqkm,\n", + " \"edge_length_m\": list_hex_edge_m,\n", + " \"perimeter_m\": list_hex_perimeter_m,\n", + " \"area_sqm\": list_hex_area_sqm,\n", + " }\n", + ")\n", "\n", - "df_meta[[\"edge_length_km\", \"perimeter_km\", \"area_sqkm\", \n", - " \"edge_length_m\", \"perimeter_m\", \"area_sqm\"]]" + "df_meta[[\"edge_length_km\", \"perimeter_km\", \"area_sqkm\", \"edge_length_m\", \"perimeter_m\", \"area_sqm\"]]" ] }, { @@ -649,36 +626,29 @@ } ], "source": [ - "lat_centr_point = -0.25 \n", + "lat_centr_point = -0.25\n", "lon_centr_point = 112.43\n", - "#43.600378, 1.445478\n", + "# 43.600378, 1.445478\n", "list_hex_res = []\n", "list_hex_res_geom = []\n", "list_res = range(0, max_res + 1)\n", "\n", "for resolution in range(0, max_res + 1):\n", " # index the point in the H3 hexagon of given index resolution\n", - " h = h3.geo_to_h3(lat = lat_centr_point,\n", - " lng = lon_centr_point,\n", - " resolution = resolution\n", - " )\n", + " h = h3.geo_to_h3(lat=lat_centr_point, lng=lon_centr_point, resolution=resolution)\n", "\n", " list_hex_res.append(h)\n", " # get the geometry of the hexagon and convert to geojson\n", - " h_geom = {\"type\": \"Polygon\",\n", - " \"coordinates\": [h3.h3_to_geo_boundary(h = h, geo_json = True)]\n", - " }\n", + " h_geom = {\"type\": \"Polygon\", \"coordinates\": [h3.h3_to_geo_boundary(h=h, geo_json=True)]}\n", " list_hex_res_geom.append(h_geom)\n", "\n", "\n", - "df_res_point = pd.DataFrame({\"res\": list_res,\n", - " \"hex_id\": list_hex_res,\n", - " \"geometry\": list_hex_res_geom\n", - " })\n", - "df_res_point[\"hex_id_binary\"] = df_res_point[\"hex_id\"].apply(\n", - " lambda x: bin(int(x, 16))[2:])\n", + "df_res_point = pd.DataFrame(\n", + " {\"res\": list_res, \"hex_id\": list_hex_res, \"geometry\": list_hex_res_geom}\n", + ")\n", + "df_res_point[\"hex_id_binary\"] = df_res_point[\"hex_id\"].apply(lambda x: bin(int(x, 16))[2:])\n", "\n", - "pd.set_option('display.max_colwidth', 63)\n", + "pd.set_option(\"display.max_colwidth\", 63)\n", "df_res_point" ] }, @@ -712,20 +682,21 @@ } ], "source": [ - "map_example = Map(location = [-0.25 , 112.43],\n", - " zoom_start = 5.5,\n", - " tiles = \"cartodbpositron\",\n", - " attr = '''© \n", + "map_example = Map(\n", + " location=[-0.25, 112.43],\n", + " zoom_start=5.5,\n", + " tiles=\"cartodbpositron\",\n", + " attr=\"\"\"© \n", " OpenStreetMapcontributors ©\n", " \n", - " CartoDB'''\n", - " )\n", + " CartoDB\"\"\",\n", + ")\n", "\n", "list_features = []\n", "for i, row in df_res_point.iterrows():\n", - " feature = Feature(geometry = row[\"geometry\"],\n", - " id = row[\"hex_id\"],\n", - " properties = {\"resolution\": int(row[\"res\"])})\n", + " feature = Feature(\n", + " geometry=row[\"geometry\"], id=row[\"hex_id\"], properties={\"resolution\": int(row[\"res\"])}\n", + " )\n", " list_features.append(feature)\n", "\n", "feat_collection = FeatureCollection(list_features)\n", @@ -733,19 +704,17 @@ "\n", "\n", "GeoJson(\n", - " geojson_result,\n", - " style_function = lambda feature: {\n", - " 'fillColor': None,\n", - " 'color': (\"green\"\n", - " if feature['properties']['resolution'] % 2 == 0\n", - " else \"red\"),\n", - " 'weight': 2,\n", - " 'fillOpacity': 0.05\n", - " },\n", - " name = \"Example\"\n", - " ).add_to(map_example)\n", + " geojson_result,\n", + " style_function=lambda feature: {\n", + " \"fillColor\": None,\n", + " \"color\": (\"green\" if feature[\"properties\"][\"resolution\"] % 2 == 0 else \"red\"),\n", + " \"weight\": 2,\n", + " \"fillOpacity\": 0.05,\n", + " },\n", + " name=\"Example\",\n", + ").add_to(map_example)\n", "\n", - "map_example.save('maps/1_resolutions.html')\n", + "map_example.save(\"maps/1_resolutions.html\")\n", "map_example" ] }, @@ -777,25 +746,19 @@ ], "source": [ "res_parent = 9\n", - "h3_cell_parent = h3.geo_to_h3(lat = lat_centr_point,\n", - " lng = lon_centr_point,\n", - " resolution = res_parent\n", - " )\n", - "h3_cells_children = list(h3.h3_to_children(h = h3_cell_parent))\n", - "assert(len(h3_cells_children) == math.pow(7, 1))\n", + "h3_cell_parent = h3.geo_to_h3(lat=lat_centr_point, lng=lon_centr_point, resolution=res_parent)\n", + "h3_cells_children = list(h3.h3_to_children(h=h3_cell_parent))\n", + "assert len(h3_cells_children) == math.pow(7, 1)\n", "# ------\n", - "h3_cells_grandchildren = list(h3.h3_to_children(h = h3_cell_parent, \n", - " res = res_parent + 2))\n", - "assert(len(h3_cells_grandchildren) == math.pow(7, 2))\n", + "h3_cells_grandchildren = list(h3.h3_to_children(h=h3_cell_parent, res=res_parent + 2))\n", + "assert len(h3_cells_grandchildren) == math.pow(7, 2)\n", "# ------\n", - "h3_cells_2xgrandchildren = list(h3.h3_to_children(h = h3_cell_parent, \n", - " res = res_parent + 3))\n", - "assert(len(h3_cells_2xgrandchildren) == math.pow(7, 3))\n", + "h3_cells_2xgrandchildren = list(h3.h3_to_children(h=h3_cell_parent, res=res_parent + 3))\n", + "assert len(h3_cells_2xgrandchildren) == math.pow(7, 3)\n", "\n", "# ------\n", - "h3_cells_3xgrandchildren = list(h3.h3_to_children(h = h3_cell_parent, \n", - " res = res_parent + 4))\n", - "assert(len(h3_cells_3xgrandchildren) == math.pow(7, 4))\n", + "h3_cells_3xgrandchildren = list(h3.h3_to_children(h=h3_cell_parent, res=res_parent + 4))\n", + "assert len(h3_cells_3xgrandchildren) == math.pow(7, 4)\n", "# ------\n", "\n", "msg_ = \"\"\"Parent cell: {} has :\n", @@ -803,10 +766,15 @@ " {} grandchildren,\n", " {} grandgrandchildren, \n", " {} grandgrandgrandchildren\"\"\"\n", - "print(msg_.format(h3_cell_parent, len(h3_cells_children),\n", - " len(h3_cells_grandchildren), \n", - " len(h3_cells_2xgrandchildren),\n", - " len(h3_cells_3xgrandchildren)))" + "print(\n", + " msg_.format(\n", + " h3_cell_parent,\n", + " len(h3_cells_children),\n", + " len(h3_cells_grandchildren),\n", + " len(h3_cells_2xgrandchildren),\n", + " len(h3_cells_3xgrandchildren),\n", + " )\n", + ")" ] }, { @@ -816,56 +784,59 @@ "outputs": [], "source": [ "def plot_parent_and_descendents(h3_cell_parent, h3_cells_children, ax=None):\n", - " \n", " list_distances_to_center = []\n", - " \n", + "\n", " if ax is None:\n", - " fig, ax = plt.subplots(1, 1, figsize = (5, 5))\n", - " \n", + " fig, ax = plt.subplots(1, 1, figsize=(5, 5))\n", + "\n", " boundary_parent_coords = h3.h3_to_geo_boundary(h=h3_cell_parent, geo_json=True)\n", " boundary_parent = geometry.Polygon(boundary_parent_coords)\n", " # print(boundary_parent.wkt, \"\\n\")\n", " res_parent = h3.h3_get_resolution(h3_cell_parent)\n", - " \n", + "\n", " # get the central descendent at the resolution of h3_cells_children\n", " res_children = h3.h3_get_resolution(h3_cells_children[0])\n", - " centerhex = h3.h3_to_center_child(h = h3_cell_parent, res = res_children)\n", + " centerhex = h3.h3_to_center_child(h=h3_cell_parent, res=res_children)\n", "\n", " # get the boundary of the multipolygon of the H3 cells union\n", " boundary_children_union_coords = h3.h3_set_to_multi_polygon(\n", - " hexes = h3_cells_children,\n", - " geo_json = True)[0][0]\n", + " hexes=h3_cells_children, geo_json=True\n", + " )[0][0]\n", " # close the linestring\n", " boundary_children_union_coords.append(boundary_children_union_coords[0])\n", " boundary_children_union = geometry.Polygon(boundary_children_union_coords)\n", " # print(boundary_children_union.wkt, \"\\n\")\n", - " \n", + "\n", " # compute the overlapping geometry\n", " # (the intersection of the boundary_parent with boundary_children_union):\n", " overlap_geom = boundary_parent.intersection(boundary_children_union)\n", - " print(\"overlap approx: {}\".format(round(overlap_geom.area / boundary_parent.area, 4))) \n", + " print(\"overlap approx: {}\".format(round(overlap_geom.area / boundary_parent.area, 4)))\n", "\n", " # plot\n", " dict_adjust_textpos = {7: 0.0003, 8: 0.0001, 9: 0.00005, 10: 0.00002}\n", - " \n", + "\n", " for child in h3_cells_children:\n", - " boundary_child_coords = h3.h3_to_geo_boundary(h = child, geo_json = True)\n", + " boundary_child_coords = h3.h3_to_geo_boundary(h=child, geo_json=True)\n", " boundary_child = geometry.Polygon(boundary_child_coords)\n", - " ax.plot(*boundary_child.exterior.coords.xy, color = \"grey\", linestyle=\"--\")\n", - " \n", - " dist_to_centerhex = h3.h3_distance(h1 = centerhex, h2 = child)\n", + " ax.plot(*boundary_child.exterior.coords.xy, color=\"grey\", linestyle=\"--\")\n", + "\n", + " dist_to_centerhex = h3.h3_distance(h1=centerhex, h2=child)\n", " list_distances_to_center.append(dist_to_centerhex)\n", - " \n", + "\n", " if res_children <= res_parent + 3:\n", " # add text\n", - " ax.text(x = boundary_child.centroid.x - dict_adjust_textpos[res_parent],\n", - " y = boundary_child.centroid.y - dict_adjust_textpos[res_parent],\n", - " s = str(dist_to_centerhex),\n", - " fontsize = 12, color = \"black\", weight = \"bold\")\n", - " \n", - " ax.plot(*boundary_children_union.exterior.coords.xy, color = \"blue\")\n", - " ax.plot(*boundary_parent.exterior.coords.xy, color = \"red\", linewidth=2)\n", - " \n", + " ax.text(\n", + " x=boundary_child.centroid.x - dict_adjust_textpos[res_parent],\n", + " y=boundary_child.centroid.y - dict_adjust_textpos[res_parent],\n", + " s=str(dist_to_centerhex),\n", + " fontsize=12,\n", + " color=\"black\",\n", + " weight=\"bold\",\n", + " )\n", + "\n", + " ax.plot(*boundary_children_union.exterior.coords.xy, color=\"blue\")\n", + " ax.plot(*boundary_parent.exterior.coords.xy, color=\"red\", linewidth=2)\n", + "\n", " return list_distances_to_center" ] }, @@ -898,19 +869,19 @@ } ], "source": [ - "fig, ax = plt.subplots(2, 2, figsize = (20, 20))\n", - "list_distances_to_center_dc = plot_parent_and_descendents(h3_cell_parent, \n", - " h3_cells_children, \n", - " ax = ax[0][0])\n", - "list_distances_to_center_gc = plot_parent_and_descendents(h3_cell_parent,\n", - " h3_cells_grandchildren,\n", - " ax = ax[0][1])\n", - "list_distances_to_center_2xgc = plot_parent_and_descendents(h3_cell_parent, \n", - " h3_cells_2xgrandchildren, \n", - " ax = ax[1][0])\n", - "list_distances_to_center_3xgc = plot_parent_and_descendents(h3_cell_parent,\n", - " h3_cells_3xgrandchildren,\n", - " ax = ax[1][1])\n", + "fig, ax = plt.subplots(2, 2, figsize=(20, 20))\n", + "list_distances_to_center_dc = plot_parent_and_descendents(\n", + " h3_cell_parent, h3_cells_children, ax=ax[0][0]\n", + ")\n", + "list_distances_to_center_gc = plot_parent_and_descendents(\n", + " h3_cell_parent, h3_cells_grandchildren, ax=ax[0][1]\n", + ")\n", + "list_distances_to_center_2xgc = plot_parent_and_descendents(\n", + " h3_cell_parent, h3_cells_2xgrandchildren, ax=ax[1][0]\n", + ")\n", + "list_distances_to_center_3xgc = plot_parent_and_descendents(\n", + " h3_cell_parent, h3_cells_3xgrandchildren, ax=ax[1][1]\n", + ")\n", "\n", "\n", "ax[0][0].set_title(\"Direct children (res 10)\")\n", @@ -983,18 +954,13 @@ "metadata": {}, "outputs": [], "source": [ - "def explore_ij_coords(lat_point, lon_point, num_rings = 3, ax = None):\n", - "\n", + "def explore_ij_coords(lat_point, lon_point, num_rings=3, ax=None):\n", " # an example at resolution 9\n", - " hex_id_ex = h3.geo_to_h3(lat = lat_point,\n", - " lng = lon_point,\n", - " resolution = 9\n", - " )\n", - " assert(h3.h3_get_resolution(hex_id_ex) == 9)\n", + " hex_id_ex = h3.geo_to_h3(lat=lat_point, lng=lon_point, resolution=9)\n", + " assert h3.h3_get_resolution(hex_id_ex) == 9\n", "\n", " # get its rings\n", - " list_siblings = list(h3.hex_range_distances(h = hex_id_ex, \n", - " K = num_rings))\n", + " list_siblings = list(h3.hex_range_distances(h=hex_id_ex, K=num_rings))\n", "\n", " dict_ij = {}\n", " dict_color = {}\n", @@ -1002,10 +968,9 @@ "\n", " if ax is None:\n", " figsize = (min(6 * num_rings, 15), min(6 * num_rings, 15))\n", - " fig, ax = plt.subplots(1, 1, figsize = figsize)\n", + " fig, ax = plt.subplots(1, 1, figsize=figsize)\n", "\n", " for ring_level in range(len(list_siblings)):\n", - "\n", " if ring_level == 0:\n", " fontcol = \"red\"\n", " elif ring_level == 1:\n", @@ -1017,52 +982,56 @@ "\n", " if ring_level == 0:\n", " # on ring 0 is only hex_id_ex\n", - " geom_boundary_coords = h3.h3_to_geo_boundary(hex_id_ex,\n", - " geo_json = True)\n", + " geom_boundary_coords = h3.h3_to_geo_boundary(hex_id_ex, geo_json=True)\n", " geom_shp = geometry.Polygon(geom_boundary_coords)\n", - " ax.plot(*geom_shp.exterior.xy, color = \"purple\")\n", + " ax.plot(*geom_shp.exterior.xy, color=\"purple\")\n", "\n", - " ij_ex = h3.experimental_h3_to_local_ij(origin = hex_id_ex,\n", - " h = hex_id_ex)\n", + " ij_ex = h3.experimental_h3_to_local_ij(origin=hex_id_ex, h=hex_id_ex)\n", " s = \" {} \\n \\n (0,0)\".format(ij_ex)\n", "\n", " dict_ij[hex_id_ex] = ij_ex\n", " dict_color[hex_id_ex] = \"red\"\n", - " dict_s[hex_id_ex] = s \n", + " dict_s[hex_id_ex] = s\n", "\n", - " ax.text(x = geom_shp.centroid.x - 0.0017,\n", - " y = geom_shp.centroid.y - 0.0005,\n", - " s = s,\n", - " fontsize = 11, color = fontcol, weight = \"bold\")\n", + " ax.text(\n", + " x=geom_shp.centroid.x - 0.0017,\n", + " y=geom_shp.centroid.y - 0.0005,\n", + " s=s,\n", + " fontsize=11,\n", + " color=fontcol,\n", + " weight=\"bold\",\n", + " )\n", " else:\n", " # get the hex ids resident on ring_level\n", " siblings_on_ring = list(list_siblings[ring_level])\n", "\n", " k = 1\n", " for sibling_hex in sorted(siblings_on_ring):\n", - " geom_boundary_coords = h3.h3_to_geo_boundary(sibling_hex,\n", - " geo_json=True)\n", + " geom_boundary_coords = h3.h3_to_geo_boundary(sibling_hex, geo_json=True)\n", " geom_shp = geometry.Polygon(geom_boundary_coords)\n", - " ax.plot(*geom_shp.exterior.xy, color = \"purple\")\n", + " ax.plot(*geom_shp.exterior.xy, color=\"purple\")\n", "\n", - " ij = h3.experimental_h3_to_local_ij(origin = hex_id_ex,\n", - " h = sibling_hex)\n", + " ij = h3.experimental_h3_to_local_ij(origin=hex_id_ex, h=sibling_hex)\n", " ij_diff = (ij[0] - ij_ex[0], ij[1] - ij_ex[1])\n", " s = \" {} \\n \\n {}\".format(ij, ij_diff)\n", " k = k + 1\n", "\n", - " dict_ij[sibling_hex] = ij \n", + " dict_ij[sibling_hex] = ij\n", " dict_color[sibling_hex] = fontcol\n", " dict_s[sibling_hex] = s\n", "\n", - " ax.text(x = geom_shp.centroid.x - 0.0017,\n", - " y = geom_shp.centroid.y - 0.0005,\n", - " s = s,\n", - " fontsize = 11, color = fontcol, weight = \"bold\")\n", + " ax.text(\n", + " x=geom_shp.centroid.x - 0.0017,\n", + " y=geom_shp.centroid.y - 0.0005,\n", + " s=s,\n", + " fontsize=11,\n", + " color=fontcol,\n", + " weight=\"bold\",\n", + " )\n", "\n", " ax.set_ylabel(\"Latitude\")\n", " ax.set_xlabel(\"Longitude\")\n", - " \n", + "\n", " return dict_ij, dict_color, dict_s" ] }, @@ -1085,8 +1054,9 @@ } ], "source": [ - "dict_ij, dict_color, dict_s = explore_ij_coords(lat_point = lat_centr_point,\n", - " lon_point = lon_centr_point)" + "dict_ij, dict_color, dict_s = explore_ij_coords(\n", + " lat_point=lat_centr_point, lon_point=lon_centr_point\n", + ")" ] }, { @@ -1188,10 +1158,10 @@ ], "source": [ "# get geometry for india\n", - "indonesia_loc = GeolocateAddress(query='Indonesia')\n", + "indonesia_loc = GeolocateAddress(query=\"Indonesia\")\n", "\n", - "#generate gdf for india with polygon geometry\n", - "gdf_indonesia = gpd.GeoDataFrame.from_features(indonesia_loc.polygon_json, crs='epsg:4326')\n", + "# generate gdf for india with polygon geometry\n", + "gdf_indonesia = gpd.GeoDataFrame.from_features(indonesia_loc.polygon_json, crs=\"epsg:4326\")\n", "gdf_indonesia" ] }, @@ -1267,17 +1237,17 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "def base_empty_map():\n", " \"\"\"Prepares a folium map centered in a central GPS point of Toulouse\"\"\"\n", - " m = Map(location = [-0.25 , 112.43],\n", - " zoom_start = 9.5,\n", - " tiles = \"cartodbpositron\",\n", - " attr = '''© \n", + " m = Map(\n", + " location=[-0.25, 112.43],\n", + " zoom_start=9.5,\n", + " tiles=\"cartodbpositron\",\n", + " attr=\"\"\"© \n", " OpenStreetMapcontributors ©\n", " \n", - " CartoDB'''\n", - " )\n", + " CartoDB\"\"\",\n", + " )\n", " return m" ] }, @@ -1816,7 +1786,7 @@ } ], "source": [ - "raw_data = gpd.read_file('../../datasets/processed/located_lg_data_point_v2.shp')\n", + "raw_data = gpd.read_file(\"../../datasets/processed/located_lg_data_point_v2.shp\")\n", "raw_data" ] }, @@ -1826,11 +1796,11 @@ "metadata": {}, "outputs": [], "source": [ - "## add the \n", - "long_list = [raw_data.iloc[i]['geometry'].x for i in range(0, len(raw_data))]\n", - "lat_list = [raw_data.iloc[i]['geometry'].y for i in range(0, len(raw_data))]\n", - "raw_data['Latitude']=lat_list\n", - "raw_data['Longitude']=long_list" + "## add the\n", + "long_list = [raw_data.iloc[i][\"geometry\"].x for i in range(0, len(raw_data))]\n", + "lat_list = [raw_data.iloc[i][\"geometry\"].y for i in range(0, len(raw_data))]\n", + "raw_data[\"Latitude\"] = lat_list\n", + "raw_data[\"Longitude\"] = long_list" ] }, { @@ -1859,9 +1829,9 @@ "mc = MarkerCluster()\n", "\n", "for i, row in raw_data.iterrows():\n", - " mk = Marker(location = [row[\"Latitude\"], row[\"Longitude\"]])\n", + " mk = Marker(location=[row[\"Latitude\"], row[\"Longitude\"]])\n", " mk.add_to(mc)\n", - " \n", + "\n", "mc.add_to(m)\n", "m" ] @@ -1966,8 +1936,10 @@ } ], "source": [ - "gdf_raw_cpy = raw_data.reset_index(inplace = False, drop = False)\n", - "df = gdf_raw_cpy.groupby(by=['Longitude', 'Latitude']).agg({'Material':'first', 'Volume':sum, 'Country': 'first'})\n", + "gdf_raw_cpy = raw_data.reset_index(inplace=False, drop=False)\n", + "df = gdf_raw_cpy.groupby(by=[\"Longitude\", \"Latitude\"]).agg(\n", + " {\"Material\": \"first\", \"Volume\": sum, \"Country\": \"first\"}\n", + ")\n", "df.reset_index(inplace=True, drop=False)\n", "df.head()" ] @@ -2002,18 +1974,11 @@ " msg_ = \"At resolution {} --> H3 cell id : {} and its geometry: {} \"\n", " print(msg_.format(res, col_hex_id, col_geom))\n", " df[col_hex_id] = df.apply(\n", - " lambda row: h3.geo_to_h3(\n", - " lat = row[\"Latitude\"],\n", - " lng = row[\"Longitude\"],\n", - " resolution = res),\n", - " axis = 1)\n", + " lambda row: h3.geo_to_h3(lat=row[\"Latitude\"], lng=row[\"Longitude\"], resolution=res), axis=1\n", + " )\n", " # use h3.h3_to_geo_boundary to obtain the geometries of these hexagons\n", " df[col_geom] = df[col_hex_id].apply(\n", - " lambda x: {\"type\": \"Polygon\",\n", - " \"coordinates\":\n", - " [h3.h3_to_geo_boundary(\n", - " h=x, geo_json=True)]\n", - " }\n", + " lambda x: {\"type\": \"Polygon\", \"coordinates\": [h3.h3_to_geo_boundary(h=x, geo_json=True)]}\n", " )\n", " df.head().T" ] @@ -2414,7 +2379,7 @@ } ], "source": [ - "ind_gdf = gpd.read_file('../../datasets/raw/ind_geo.json')\n", + "ind_gdf = gpd.read_file(\"../../datasets/raw/ind_geo.json\")\n", "ind_gdf.head()" ] }, @@ -2502,22 +2467,23 @@ "source": [ "## multipolygon to polygon\n", "import geopandas as gpd\n", - "from shapely.geometry.polygon import Polygon\n", "from shapely.geometry.multipolygon import MultiPolygon\n", + "from shapely.geometry.polygon import Polygon\n", + "\n", "\n", "def explode(indf):\n", - " #indf = gpd.GeoDataFrame.from_file(indata)\n", + " # indf = gpd.GeoDataFrame.from_file(indata)\n", " outdf = gpd.GeoDataFrame(columns=indf.columns)\n", " for idx, row in indf.iterrows():\n", " if type(row.geometry) == Polygon:\n", - " outdf = outdf.append(row,ignore_index=True)\n", + " outdf = outdf.append(row, ignore_index=True)\n", " if type(row.geometry) == MultiPolygon:\n", " multdf = gpd.GeoDataFrame(columns=indf.columns)\n", " recs = len(row.geometry)\n", - " multdf = multdf.append([row]*recs,ignore_index=True)\n", + " multdf = multdf.append([row] * recs, ignore_index=True)\n", " for geom in range(recs):\n", - " multdf.loc[geom,'geometry'] = row.geometry[geom]\n", - " outdf = outdf.append(multdf,ignore_index=True)\n", + " multdf.loc[geom, \"geometry\"] = row.geometry[geom]\n", + " outdf = outdf.append(multdf, ignore_index=True)\n", " return outdf" ] }, @@ -2667,8 +2633,8 @@ "metadata": {}, "outputs": [], "source": [ - "#save polygon file as geojson\n", - "ind_gdf_test.to_file('../../datasets/raw/ind_geo_test.json', driver='GeoJSON')" + "# save polygon file as geojson\n", + "ind_gdf_test.to_file(\"../../datasets/raw/ind_geo_test.json\", driver=\"GeoJSON\")" ] }, { @@ -2681,17 +2647,15 @@ " \"\"\"Loads a geojson files of polygon geometries and features,\n", " swaps the latitude and longitude andstores geojson\"\"\"\n", " gdf = gpd.read_file(filepath, driver=\"GeoJSON\")\n", - " \n", - " gdf[\"geom_geojson\"] = gdf[\"geometry\"].apply(\n", - " lambda x: geometry.mapping(x))\n", + "\n", + " gdf[\"geom_geojson\"] = gdf[\"geometry\"].apply(lambda x: geometry.mapping(x))\n", "\n", " gdf[\"geom_swap\"] = gdf[\"geometry\"].map(\n", - " lambda polygon: ops.transform(\n", - " lambda x, y: (y, x), polygon))\n", + " lambda polygon: ops.transform(lambda x, y: (y, x), polygon)\n", + " )\n", + "\n", + " gdf[\"geom_swap_geojson\"] = gdf[\"geom_swap\"].apply(lambda x: geometry.mapping(x))\n", "\n", - " gdf[\"geom_swap_geojson\"] = gdf[\"geom_swap\"].apply(\n", - " lambda x: geometry.mapping(x))\n", - " \n", " return gdf" ] }, @@ -2870,7 +2834,7 @@ } ], "source": [ - "ind_gdf_swap = load_and_prepare_districts(filepath = '../../datasets/raw/ind_geo_test.json')\n", + "ind_gdf_swap = load_and_prepare_districts(filepath=\"../../datasets/raw/ind_geo_test.json\")\n", "ind_gdf_swap.head()" ] }, @@ -2955,29 +2919,25 @@ "metadata": {}, "outputs": [], "source": [ - "def fill_hexagons(geom_geojson, res, flag_swap = False, flag_return_df = False):\n", + "def fill_hexagons(geom_geojson, res, flag_swap=False, flag_return_df=False):\n", " \"\"\"Fills a geometry given in geojson format with H3 hexagons at specified\n", " resolution. The flag_reverse_geojson allows to specify whether the geometry\n", " is lon/lat or swapped\"\"\"\n", "\n", - " set_hexagons = h3.polyfill(geojson = geom_geojson,\n", - " res = res,\n", - " geo_json_conformant = flag_swap)\n", + " set_hexagons = h3.polyfill(geojson=geom_geojson, res=res, geo_json_conformant=flag_swap)\n", " list_hexagons_filling = list(set_hexagons)\n", "\n", " if flag_return_df is True:\n", " # make dataframe\n", " df_fill_hex = pd.DataFrame({\"hex_id\": list_hexagons_filling})\n", " df_fill_hex[\"value\"] = 0\n", - " df_fill_hex['geometry'] = df_fill_hex.hex_id.apply(\n", - " lambda x:\n", - " {\"type\": \"Polygon\",\n", - " \"coordinates\": [\n", - " h3.h3_to_geo_boundary(h=x,\n", - " geo_json=True)\n", - " ]\n", - " })\n", - " assert(df_fill_hex.shape[0] == len(list_hexagons_filling))\n", + " df_fill_hex[\"geometry\"] = df_fill_hex.hex_id.apply(\n", + " lambda x: {\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [h3.h3_to_geo_boundary(h=x, geo_json=True)],\n", + " }\n", + " )\n", + " assert df_fill_hex.shape[0] == len(list_hexagons_filling)\n", " return df_fill_hex\n", " else:\n", " return list_hexagons_filling" @@ -3243,8 +3203,9 @@ } ], "source": [ - "\n", - "ind_gdf_swap[\"hex_fill_initial\"] = ind_gdf_swap[\"geom_swap_geojson\"].apply(lambda x: list(fill_hexagons(geom_geojson = x, res = 13)))\n", + "ind_gdf_swap[\"hex_fill_initial\"] = ind_gdf_swap[\"geom_swap_geojson\"].apply(\n", + " lambda x: list(fill_hexagons(geom_geojson=x, res=13))\n", + ")\n", "ind_gdf_swap[\"num_hex_fill_initial\"] = ind_gdf_swap[\"hex_fill_initial\"].apply(len)\n", "\n", "total_num_hex_initial = ind_gdf_swap[\"num_hex_fill_initial\"].sum()\n", @@ -3298,7 +3259,7 @@ "metadata": {}, "outputs": [], "source": [ - "test_gdf = ind_gdf_swap[:15]\n" + "test_gdf = ind_gdf_swap[:15]" ] }, { @@ -3406,15 +3367,21 @@ "test_gdf[\"hex_fill_compact\"] = test_gdf[\"hex_fill_initial\"].apply(lambda x: list(h3.compact(x)))\n", "test_gdf[\"num_hex_fill_compact\"] = test_gdf[\"hex_fill_compact\"].apply(len)\n", "\n", - "print(\"Reduced number of cells from {} to {} \\n\".format(\n", - " test_gdf[\"num_hex_fill_initial\"].sum(),\n", - " test_gdf[\"num_hex_fill_compact\"].sum()))\n", + "print(\n", + " \"Reduced number of cells from {} to {} \\n\".format(\n", + " test_gdf[\"num_hex_fill_initial\"].sum(), test_gdf[\"num_hex_fill_compact\"].sum()\n", + " )\n", + ")\n", "\n", "# count cells by index resolution after compacting\n", "\n", - "test_gdf[\"hex_resolutions\"] = test_gdf[\"hex_fill_compact\"].apply(lambda x: [h3.h3_get_resolution(hexid) for hexid in x])\n", + "test_gdf[\"hex_resolutions\"] = test_gdf[\"hex_fill_compact\"].apply(\n", + " lambda x: [h3.h3_get_resolution(hexid) for hexid in x]\n", + ")\n", "test_gdf[\"hex_resolutions_counts\"] = test_gdf[\"hex_resolutions\"].apply(lambda x: Counter(x))\n", - "test_gdf[[\"geometry\", \"num_hex_fill_initial\", \"num_hex_fill_compact\", \"hex_resolutions_counts\"]].head()" + "test_gdf[\n", + " [\"geometry\", \"num_hex_fill_initial\", \"num_hex_fill_compact\", \"hex_resolutions_counts\"]\n", + "].head()" ] }, { @@ -3423,7 +3390,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "# this column of empty lists is a placeholder, will be used further in this section\n", "test_gdf[\"compacted_novoids\"] = [[] for _ in range(test_gdf.shape[0])]" ] @@ -3434,50 +3400,51 @@ "metadata": {}, "outputs": [], "source": [ - "def plot_basemap_region_fill(df_boundaries_zones, initial_map = None):\n", - " \n", + "def plot_basemap_region_fill(df_boundaries_zones, initial_map=None):\n", " \"\"\"On a folium map, add the boundaries of the geometries in geojson formatted\n", - " column of df_boundaries_zones\"\"\"\n", + " column of df_boundaries_zones\"\"\"\n", "\n", " if initial_map is None:\n", " initial_map = base_empty_map()\n", "\n", - " feature_group = folium.FeatureGroup(name='Boundaries')\n", + " feature_group = folium.FeatureGroup(name=\"Boundaries\")\n", "\n", " for i, row in df_boundaries_zones.iterrows():\n", - " feature_sel = Feature(geometry = row[\"geom_geojson\"], id=str(i))\n", + " feature_sel = Feature(geometry=row[\"geom_geojson\"], id=str(i))\n", " feat_collection_sel = FeatureCollection([feature_sel])\n", " geojson_subzone = json.dumps(feat_collection_sel)\n", "\n", " GeoJson(\n", - " geojson_subzone,\n", - " style_function=lambda feature: {\n", - " 'fillColor': None,\n", - " 'color': 'blue',\n", - " 'weight': 5,\n", - " 'fillOpacity': 0\n", - " }\n", - " ).add_to(feature_group)\n", + " geojson_subzone,\n", + " style_function=lambda feature: {\n", + " \"fillColor\": None,\n", + " \"color\": \"blue\",\n", + " \"weight\": 5,\n", + " \"fillOpacity\": 0,\n", + " },\n", + " ).add_to(feature_group)\n", "\n", " feature_group.add_to(initial_map)\n", " return initial_map\n", "\n", - "# ---------------------------------------------------------------------------\n", "\n", + "# ---------------------------------------------------------------------------\n", "\n", - "def hexagons_dataframe_to_geojson(df_hex, hex_id_field,\n", - " geometry_field, value_field,\n", - " file_output = None):\n", "\n", + "def hexagons_dataframe_to_geojson(\n", + " df_hex, hex_id_field, geometry_field, value_field, file_output=None\n", + "):\n", " \"\"\"Produce the GeoJSON representation containing all geometries in a dataframe\n", - " based on a column in geojson format (geometry_field)\"\"\"\n", + " based on a column in geojson format (geometry_field)\"\"\"\n", "\n", " list_features = []\n", "\n", " for i, row in df_hex.iterrows():\n", - " feature = Feature(geometry = row[geometry_field],\n", - " id = row[hex_id_field],\n", - " properties = {\"value\": row[value_field]})\n", + " feature = Feature(\n", + " geometry=row[geometry_field],\n", + " id=row[hex_id_field],\n", + " properties={\"value\": row[value_field]},\n", + " )\n", " list_features.append(feature)\n", "\n", " feat_collection = FeatureCollection(list_features)\n", @@ -3491,31 +3458,31 @@ "\n", " return geojson_result\n", "\n", + "\n", "# ---------------------------------------------------------------------------------\n", "\n", "\n", - "def map_addlayer_filling(df_fill_hex, layer_name, map_initial, fillcolor = None):\n", - " \"\"\" On a folium map (likely created with plot_basemap_region_fill),\n", - " add a layer of hexagons that filled the geometry at given H3 resolution\n", - " (df_fill_hex returned by fill_hexagons method)\"\"\"\n", + "def map_addlayer_filling(df_fill_hex, layer_name, map_initial, fillcolor=None):\n", + " \"\"\"On a folium map (likely created with plot_basemap_region_fill),\n", + " add a layer of hexagons that filled the geometry at given H3 resolution\n", + " (df_fill_hex returned by fill_hexagons method)\"\"\"\n", "\n", - " geojson_hx = hexagons_dataframe_to_geojson(df_fill_hex,\n", - " hex_id_field = \"hex_id\",\n", - " value_field = \"value\",\n", - " geometry_field = \"geometry\")\n", + " geojson_hx = hexagons_dataframe_to_geojson(\n", + " df_fill_hex, hex_id_field=\"hex_id\", value_field=\"value\", geometry_field=\"geometry\"\n", + " )\n", "\n", " GeoJson(\n", - " geojson_hx,\n", - " style_function=lambda feature: {\n", - " 'fillColor': fillcolor,\n", - " 'color': 'red',\n", - " 'weight': 2,\n", - " 'fillOpacity': 0.1\n", - " },\n", - " name = layer_name\n", - " ).add_to(map_initial)\n", + " geojson_hx,\n", + " style_function=lambda feature: {\n", + " \"fillColor\": fillcolor,\n", + " \"color\": \"red\",\n", + " \"weight\": 2,\n", + " \"fillOpacity\": 0.1,\n", + " },\n", + " name=layer_name,\n", + " ).add_to(map_initial)\n", "\n", - " return map_initial\n" + " return map_initial" ] }, { @@ -3524,37 +3491,36 @@ "metadata": {}, "outputs": [], "source": [ - "def visualize_filled_compact(gdf,fillcolor=None):\n", + "def visualize_filled_compact(gdf, fillcolor=None):\n", " overall_map = base_empty_map()\n", - " map_ = plot_basemap_region_fill(gdf, initial_map =overall_map)\n", - " \n", + " map_ = plot_basemap_region_fill(gdf, initial_map=overall_map)\n", + "\n", " for i, row in gdf.iterrows():\n", - " \n", - " if len(row['compacted_novoids']) > 0:\n", + " if len(row[\"compacted_novoids\"]) > 0:\n", " list_hexagons_filling_compact = row[\"compacted_novoids\"]\n", " else:\n", " list_hexagons_filling_compact = []\n", - " \n", + "\n", " list_hexagons_filling_compact.extend(row[\"hex_fill_compact\"])\n", " list_hexagons_filling_compact = list(set(list_hexagons_filling_compact))\n", - " \n", + "\n", " # make dataframes\n", " df_fill_compact = pd.DataFrame({\"hex_id\": list_hexagons_filling_compact})\n", " df_fill_compact[\"value\"] = 0\n", - " df_fill_compact['geometry'] = df_fill_compact.hex_id.apply(\n", - " lambda x: \n", - " {\"type\": \"Polygon\",\n", - " \"coordinates\": [\n", - " h3.h3_to_geo_boundary(h=x,\n", - " geo_json=True)\n", - " ]\n", - " })\n", + " df_fill_compact[\"geometry\"] = df_fill_compact.hex_id.apply(\n", + " lambda x: {\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [h3.h3_to_geo_boundary(h=x, geo_json=True)],\n", + " }\n", + " )\n", "\n", - " map_fill_compact = map_addlayer_filling(df_fill_hex = df_fill_compact, \n", - " layer_name = 'test_ind',\n", - " map_initial = map_,\n", - " fillcolor = fillcolor)\n", - " folium.map.LayerControl('bottomright', collapsed=True).add_to(map_fill_compact)\n", + " map_fill_compact = map_addlayer_filling(\n", + " df_fill_hex=df_fill_compact,\n", + " layer_name=\"test_ind\",\n", + " map_initial=map_,\n", + " fillcolor=fillcolor,\n", + " )\n", + " folium.map.LayerControl(\"bottomright\", collapsed=True).add_to(map_fill_compact)\n", "\n", " return map_fill_compact" ] @@ -3592,7 +3558,7 @@ } ], "source": [ - "visualize_filled_compact(gdf = test_gdf)" + "visualize_filled_compact(gdf=test_gdf)" ] }, { @@ -3613,29 +3579,24 @@ "outputs": [], "source": [ "def generate_filled_compact(gdf):\n", - " \n", " for i, row in gdf.iterrows():\n", - " \n", - " if len(row['compacted_novoids']) > 0:\n", + " if len(row[\"compacted_novoids\"]) > 0:\n", " list_hexagons_filling_compact = row[\"compacted_novoids\"]\n", " else:\n", " list_hexagons_filling_compact = []\n", - " \n", + "\n", " list_hexagons_filling_compact.extend(row[\"hex_fill_compact\"])\n", " list_hexagons_filling_compact = list(set(list_hexagons_filling_compact))\n", - " \n", + "\n", " # make dataframes\n", " df_fill_compact = pd.DataFrame({\"hex_id\": list_hexagons_filling_compact})\n", " df_fill_compact[\"value\"] = 0\n", - " df_fill_compact['geometry'] = df_fill_compact.hex_id.apply(\n", - " lambda x: \n", - " {\"type\": \"Polygon\",\n", - " \"coordinates\": [\n", - " h3.h3_to_geo_boundary(h=x,\n", - " geo_json=True)\n", - " ]\n", - " })\n", - "\n", + " df_fill_compact[\"geometry\"] = df_fill_compact.hex_id.apply(\n", + " lambda x: {\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [h3.h3_to_geo_boundary(h=x, geo_json=True)],\n", + " }\n", + " )\n", "\n", " return df_fill_compact" ] @@ -3731,7 +3692,7 @@ "metadata": {}, "outputs": [], "source": [ - "to_save_gdf.to_json('test_hex_export.json')" + "to_save_gdf.to_json(\"test_hex_export.json\")" ] }, { @@ -3760,9 +3721,9 @@ "metadata": {}, "outputs": [], "source": [ - "from shapely.geometry import mapping, shape\n", - "import json \n", + "import json\n", "\n", + "from shapely.geometry import shape\n", "\n", "to_save_gdf[\"the_geom\"] = to_save_gdf[\"geometry\"].apply(lambda x: shape(x))" ] @@ -4028,7 +3989,7 @@ } ], "source": [ - "to_save_gdf.set_geometry('the_geom')" + "to_save_gdf.set_geometry(\"the_geom\")" ] }, { @@ -4204,7 +4165,7 @@ } ], "source": [ - "test_2 = gpd.GeoDataFrame(gdf_[['hex_id', 'the_geom']])\n", + "test_2 = gpd.GeoDataFrame(gdf_[[\"hex_id\", \"the_geom\"]])\n", "test_2.head()" ] }, @@ -4322,7 +4283,7 @@ } ], "source": [ - "test_2.set_geometry('the_geom')\n" + "test_2.set_geometry(\"the_geom\")" ] }, { @@ -4331,7 +4292,7 @@ "metadata": {}, "outputs": [], "source": [ - "test_2['geometry']=test_2['the_geom']" + "test_2[\"geometry\"] = test_2[\"the_geom\"]" ] }, { @@ -4363,7 +4324,7 @@ } ], "source": [ - "test_2[['hex_id', 'geometry']].plot()" + "test_2[[\"hex_id\", \"geometry\"]].plot()" ] }, { @@ -4478,7 +4439,7 @@ } ], "source": [ - "cv = gpd.read_file('../../datasets/raw/cvalenciana.shp')\n", + "cv = gpd.read_file(\"../../datasets/raw/cvalenciana.shp\")\n", "cv.head()" ] }, @@ -4569,7 +4530,7 @@ } ], "source": [ - "cv_test = gpd.read_file('../../datasets/raw/cvalenciana_test.shp')\n", + "cv_test = gpd.read_file(\"../../datasets/raw/cvalenciana_test.shp\")\n", "cv_test.head()" ] }, @@ -4579,9 +4540,8 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "# Create an empty dataframe to write data into\n", - "h3_df = pd.DataFrame([],columns=['country','city','h3_id','h3_geo_boundary','h3_centroid'])" + "h3_df = pd.DataFrame([], columns=[\"country\", \"city\", \"h3_id\", \"h3_geo_boundary\", \"h3_centroid\"])" ] }, { @@ -4590,7 +4550,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "import shapely" ] }, @@ -4600,7 +4559,7 @@ "metadata": {}, "outputs": [], "source": [ - "district_polygon = list(cv.iloc[0]['geometry']) " + "district_polygon = list(cv.iloc[0][\"geometry\"])" ] }, { @@ -4609,7 +4568,7 @@ "metadata": {}, "outputs": [], "source": [ - "polygon_geojson = gpd.GeoSeries(cv.iloc[0]['geometry']).__geo_interface__" + "polygon_geojson = gpd.GeoSeries(cv.iloc[0][\"geometry\"]).__geo_interface__" ] }, { @@ -4627,7 +4586,7 @@ "metadata": {}, "outputs": [], "source": [ - "poly_geojson = polygon_geojson['features'][0]['geometry']" + "poly_geojson = polygon_geojson[\"features\"][0][\"geometry\"]" ] }, { @@ -5666,19 +5625,13 @@ "outputs": [], "source": [ "# Create an empty dataframe to write data into\n", - "h3_df = pd.DataFrame([],columns=['h3_id','h3_geo_boundary','h3_centroid'])\n", + "h3_df = pd.DataFrame([], columns=[\"h3_id\", \"h3_geo_boundary\", \"h3_centroid\"])\n", "for h3_hex in h3_hexes:\n", - " h3_geo_boundary = shapely.geometry.Polygon(\n", - " h3.h3_to_geo_boundary(h3_hex,geo_json=True)\n", - " )\n", - " \n", + " h3_geo_boundary = shapely.geometry.Polygon(h3.h3_to_geo_boundary(h3_hex, geo_json=True))\n", + "\n", " h3_centroid = h3.h3_to_geo(h3_hex)\n", " # Append results to dataframe\n", - " h3_df.loc[len(h3_df)]=[\n", - " h3_hex,\n", - " h3_geo_boundary,\n", - " h3_centroid\n", - " ]" + " h3_df.loc[len(h3_df)] = [h3_hex, h3_geo_boundary, h3_centroid]" ] }, { @@ -5687,7 +5640,7 @@ "metadata": {}, "outputs": [], "source": [ - "h3_df.to_csv('../../datasets/processed/hex3_test_v4_8res.csv', index=False)" + "h3_df.to_csv(\"../../datasets/processed/hex3_test_v4_8res.csv\", index=False)" ] }, { @@ -5706,19 +5659,13 @@ "outputs": [], "source": [ "# Create an empty dataframe to write data into\n", - "h3_df_test = pd.DataFrame([],columns=['h3_id','h3_geo_boundary','h3_centroid'])\n", + "h3_df_test = pd.DataFrame([], columns=[\"h3_id\", \"h3_geo_boundary\", \"h3_centroid\"])\n", "for h3_hex in test_h3:\n", - " h3_geo_boundary = shapely.geometry.Polygon(\n", - " h3.h3_to_geo_boundary(h3_hex,geo_json=True)\n", - " )\n", - " \n", + " h3_geo_boundary = shapely.geometry.Polygon(h3.h3_to_geo_boundary(h3_hex, geo_json=True))\n", + "\n", " h3_centroid = h3.h3_to_geo(h3_hex)\n", " # Append results to dataframe\n", - " h3_df_test.loc[len(h3_df)]=[\n", - " h3_hex,\n", - " h3_geo_boundary,\n", - " h3_centroid\n", - " ]" + " h3_df_test.loc[len(h3_df)] = [h3_hex, h3_geo_boundary, h3_centroid]" ] }, { @@ -5727,7 +5674,7 @@ "metadata": {}, "outputs": [], "source": [ - "h3_df_test.to_csv('../../datasets/processed/hex3_test_v2.csv', index=False)" + "h3_df_test.to_csv(\"../../datasets/processed/hex3_test_v2.csv\", index=False)" ] }, { @@ -5736,34 +5683,26 @@ "metadata": {}, "outputs": [], "source": [ - "# Iterate over every row of the geo dataframe \n", + "# Iterate over every row of the geo dataframe\n", "for i, row in cv.iterrows():\n", " # Parse out info from columns of row\n", " country = row.NAME_0\n", " city = row.NAME_1\n", " district_multipolygon = row.geometry\n", " # Convert multi-polygon into list of polygons\n", - " district_polygon = list(district_multipolygon) \n", + " district_polygon = list(district_multipolygon)\n", " for polygon in district_polygon:\n", " # Convert Polygon to GeoJSON dictionary\n", " poly_geojson = gpd.GeoSeries([polygon]).__geo_interface__\n", " # Parse out geometry key from GeoJSON dictionary\n", - " poly_geojson = poly_geojson['features'][0]['geometry'] \n", + " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n", " # Fill the dictionary with Resolution 10 H3 Hexagons\n", - " h3_hexes = h3.polyfill_geojson(poly_geojson, 10) \n", + " h3_hexes = h3.polyfill_geojson(poly_geojson, 10)\n", " for h3_hex in h3_hexes:\n", - " h3_geo_boundary = shapely.geometry.Polygon(\n", - " h3.h3_to_geo_boundary(h3_hex,geo_json=True)\n", - " )\n", + " h3_geo_boundary = shapely.geometry.Polygon(h3.h3_to_geo_boundary(h3_hex, geo_json=True))\n", " h3_centroid = h3.h3_to_geo(h3_hex)\n", " # Append results to dataframe\n", - " h3_df.loc[len(h3_df)]=[\n", - " country,\n", - " city,\n", - " h3_hex,\n", - " h3_geo_boundary,\n", - " h3_centroid\n", - " ]" + " h3_df.loc[len(h3_df)] = [country, city, h3_hex, h3_geo_boundary, h3_centroid]" ] }, { diff --git a/data/notebooks/Lab/0_4_1_H3_calculations.ipynb b/data/notebooks/Lab/0_4_1_H3_calculations.ipynb index 97f80e973..9c7c29e6c 100644 --- a/data/notebooks/Lab/0_4_1_H3_calculations.ipynb +++ b/data/notebooks/Lab/0_4_1_H3_calculations.ipynb @@ -375,31 +375,27 @@ } ], "source": [ - "# import libraries\n", - "import h3\n", - "\n", - "import pandas as pd\n", - "import geopandas as gpd\n", "import json\n", "import time\n", "\n", - "from shapely.geometry import shape, Polygon, Point\n", - "\n", + "import geopandas as gpd\n", "\n", + "# import libraries\n", + "import h3\n", + "import pandas as pd\n", + "import pandas_bokeh\n", "from rasterstats import gen_zonal_stats\n", + "from shapely.geometry import Point, shape\n", "\n", - "\n", - "import pandas_bokeh\n", "pandas_bokeh.output_notebook()\n", "\n", + "\n", "import numpy as np\n", "import scipy.special\n", - "\n", "from bokeh.layouts import gridplot\n", - "from bokeh.plotting import figure, output_file, show\n", "from bokeh.models import ColumnDataSource\n", - "from datetime import datetime\n", - "from bokeh.palettes import Spectral10" + "from bokeh.palettes import Spectral10\n", + "from bokeh.plotting import figure, output_file, show" ] }, { @@ -429,25 +425,27 @@ "outputs": [], "source": [ "# define function to covert geoms to h3\n", + "\n", + "\n", "def generate_h3_features(geometry, res):\n", " \"\"\"\n", " Generate h3 for geometry\n", - " \n", + "\n", " Input\n", " ------\n", " geometry: shapely.polygon or shapely.multipolygon\n", - " \n", + "\n", " Output\n", " ------\n", " gdf with H3_hexes\n", " \"\"\"\n", " # Create an empty dataframe to write data into\n", - " h3_df = pd.DataFrame([],columns=['h3_id'])\n", - " if geometry.geom_type == 'MultiPolygon':\n", + " pd.DataFrame([], columns=[\"h3_id\"])\n", + " if geometry.geom_type == \"MultiPolygon\":\n", " district_polygon = list(geometry)\n", " for polygon in district_polygon:\n", " poly_geojson = gpd.GeoSeries([polygon]).__geo_interface__\n", - " poly_geojson = poly_geojson['features'][0]['geometry'] \n", + " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n", " h3_hexes = h3.polyfill_geojson(poly_geojson, res)\n", " for h3_hex in h3_hexes:\n", " coords = h3.h3_set_to_multi_polygon([h3_hex], geo_json=True)\n", @@ -456,9 +454,9 @@ " \"properties\": {\"hexid\": h3_hex},\n", " \"geometry\": {\"type\": \"Polygon\", \"coordinates\": coords[0]},\n", " }\n", - " elif geometry.geom_type == 'Polygon':\n", + " elif geometry.geom_type == \"Polygon\":\n", " poly_geojson = gpd.GeoSeries(geometry).__geo_interface__\n", - " poly_geojson = poly_geojson['features'][0]['geometry']\n", + " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n", " h3_hexes = h3.polyfill_geojson(poly_geojson, res)\n", " for h3_hex in h3_hexes:\n", " coords = h3.h3_set_to_multi_polygon([h3_hex], geo_json=True)\n", @@ -468,14 +466,13 @@ " \"geometry\": {\"type\": \"Polygon\", \"coordinates\": coords[0]},\n", " }\n", " else:\n", - " print('Shape is not a polygon or multypolygon.')\n", - " \n", + " print(\"Shape is not a polygon or multypolygon.\")\n", + "\n", "\n", - " \n", "def get_h3_array(geom, raster_path, res, stats, prefix):\n", " \"\"\"\n", " Function that trasnlate a raster into h3\n", - " \n", + "\n", " Input\n", " ------\n", " geom - geometry used for filling with h3\n", @@ -483,13 +480,13 @@ " res - resolution of the h3 level\n", " stats - stats used in the summary stats\n", " prefix - for output in the summary stats column\n", - " \n", + "\n", " Output\n", " ------\n", " array - temporal array with hex id and stats info\n", " \"\"\"\n", " h3_features = generate_h3_features(geom, res)\n", - " \n", + "\n", " summ_stats_h3_r5 = gen_zonal_stats(\n", " h3_features,\n", " raster_path,\n", @@ -497,33 +494,40 @@ " prefix=prefix,\n", " percent_cover_weighting=True,\n", " geojson_out=True,\n", - " all_touched=True\n", - " )\n", - " \n", + " all_touched=True,\n", + " )\n", + "\n", " _array = []\n", " for feature in summ_stats_h3_r5:\n", - " if feature['properties'][f'{prefix}{stats}'] !=0:\n", + " if feature[\"properties\"][f\"{prefix}{stats}\"] != 0:\n", " element = {\n", - " 'sumStats':feature['properties'][f'{prefix}{stats}'],\n", - " 'hexId':feature['properties']['hexid'], \n", + " \"sumStats\": feature[\"properties\"][f\"{prefix}{stats}\"],\n", + " \"hexId\": feature[\"properties\"][\"hexid\"],\n", " }\n", " _array.append(element)\n", - " return _array \n", + " return _array\n", "\n", "\n", "def make_plot(title, hist, edges, x, pdf, cdf):\n", - " p = figure(title=title, tools='', background_fill_color=\"#fafafa\")\n", - " p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],\n", - " fill_color=\"navy\", line_color=\"white\", alpha=0.5)\n", + " p = figure(title=title, tools=\"\", background_fill_color=\"#fafafa\")\n", + " p.quad(\n", + " top=hist,\n", + " bottom=0,\n", + " left=edges[:-1],\n", + " right=edges[1:],\n", + " fill_color=\"navy\",\n", + " line_color=\"white\",\n", + " alpha=0.5,\n", + " )\n", " p.line(x, pdf, line_color=\"#ff8888\", line_width=4, alpha=0.7, legend_label=\"PDF\")\n", " p.line(x, cdf, line_color=\"orange\", line_width=2, alpha=0.7, legend_label=\"CDF\")\n", "\n", " p.y_range.start = 0\n", " p.legend.location = \"center_right\"\n", " p.legend.background_fill_color = \"#fefefe\"\n", - " p.xaxis.axis_label = 'x'\n", - " p.yaxis.axis_label = 'Pr(x)'\n", - " p.grid.grid_line_color=\"white\"\n", + " p.xaxis.axis_label = \"x\"\n", + " p.yaxis.axis_label = \"Pr(x)\"\n", + " p.grid.grid_line_color = \"white\"\n", " return p" ] }, @@ -609,8 +613,8 @@ } ], "source": [ - "#import indonesia clip test shape\n", - "gdf_ind = gpd.read_file('../../datasets/raw/input_data_test/indonesia_test_shape_clip.shp')\n", + "# import indonesia clip test shape\n", + "gdf_ind = gpd.read_file(\"../../datasets/raw/input_data_test/indonesia_test_shape_clip.shp\")\n", "gdf_ind" ] }, @@ -642,8 +646,8 @@ } ], "source": [ - "#set geom to epsg 4326 for summ stats\n", - "gdf_ind = gdf_ind.to_crs('EPSG:4326')\n", + "# set geom to epsg 4326 for summ stats\n", + "gdf_ind = gdf_ind.to_crs(\"EPSG:4326\")\n", "gdf_ind.crs" ] }, @@ -668,8 +672,8 @@ } ], "source": [ - "#get geometry to parse\n", - "geom = gdf_ind.iloc[0]['geometry']\n", + "# get geometry to parse\n", + "geom = gdf_ind.iloc[0][\"geometry\"]\n", "geom" ] }, @@ -725,8 +729,8 @@ } ], "source": [ - "#import world dataset\n", - "gdf_world = gpd.read_file('../../datasets/raw/input_data_test/world_shape_simpl.shp')\n", + "# import world dataset\n", + "gdf_world = gpd.read_file(\"../../datasets/raw/input_data_test/world_shape_simpl.shp\")\n", "gdf_world" ] }, @@ -758,7 +762,7 @@ } ], "source": [ - "#check crs of world geom\n", + "# check crs of world geom\n", "gdf_world.crs" ] }, @@ -783,7 +787,7 @@ } ], "source": [ - "geom_world = gdf_world.iloc[0]['geometry']\n", + "geom_world = gdf_world.iloc[0][\"geometry\"]\n", "geom_world" ] }, @@ -803,12 +807,16 @@ "metadata": {}, "outputs": [], "source": [ - "#rasters for testing calculations with different resolutions - need to be in epsg4326\n", - "raster_path_30m = '../../datasets/processed/deforestation_indicators/deforestation_risk_ha_2018_new_extent_4326.tif'\n", - "raster_path_10km = '../../datasets/processed/processed_data/risk_map/water_risk_cotton_4326_2000_v2.tif'\n", + "# rasters for testing calculations with different resolutions - need to be in epsg4326\n", + "raster_path_30m = \"../../datasets/processed/deforestation_indicators/deforestation_risk_ha_2018_new_extent_4326.tif\"\n", + "raster_path_10km = (\n", + " \"../../datasets/processed/processed_data/risk_map/water_risk_cotton_4326_2000_v2.tif\"\n", + ")\n", "\n", "\n", - "raster_path_10km_3857 = '../../datasets/processed/processed_data/risk_map/water_risk_cotton_3857_2000_v2.tif'" + "raster_path_10km_3857 = (\n", + " \"../../datasets/processed/processed_data/risk_map/water_risk_cotton_3857_2000_v2.tif\"\n", + ")" ] }, { @@ -826,8 +834,8 @@ "metadata": {}, "outputs": [], "source": [ - "#get h3 array for resolution 5 and raster of 10km resolution\n", - "array_5_res = get_h3_array(geom, raster_path_10km, 5, 'median', 'wr_cotton_')" + "# get h3 array for resolution 5 and raster of 10km resolution\n", + "array_5_res = get_h3_array(geom, raster_path_10km, 5, \"median\", \"wr_cotton_\")" ] }, { @@ -837,8 +845,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export json\n", - "with open('./water_risk_res5.json', 'w') as f:\n", + "# export json\n", + "with open(\"./water_risk_res5.json\", \"w\") as f:\n", " json.dump(array_5_res, f)" ] }, @@ -857,8 +865,8 @@ "metadata": {}, "outputs": [], "source": [ - "#get h3 array for resolution 5 and raster of 10km resolution\n", - "array_6_res = get_h3_array(geom, raster_path_10km, 6, 'median', 'wr_cotton_')" + "# get h3 array for resolution 5 and raster of 10km resolution\n", + "array_6_res = get_h3_array(geom, raster_path_10km, 6, \"median\", \"wr_cotton_\")" ] }, { @@ -868,8 +876,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export json\n", - "with open('./water_risk_res6.json', 'w') as f:\n", + "# export json\n", + "with open(\"./water_risk_res6.json\", \"w\") as f:\n", " json.dump(array_6_res, f)" ] }, @@ -888,8 +896,8 @@ "metadata": {}, "outputs": [], "source": [ - "#get h3 array for resolution 5 and raster of 10km resolution\n", - "array_6_res_mean = get_h3_array(geom, raster_path_10km, 6, 'mean', 'wr_cotton_')" + "# get h3 array for resolution 5 and raster of 10km resolution\n", + "array_6_res_mean = get_h3_array(geom, raster_path_10km, 6, \"mean\", \"wr_cotton_\")" ] }, { @@ -899,8 +907,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export json\n", - "with open('./water_risk_res6_mean.json', 'w') as f:\n", + "# export json\n", + "with open(\"./water_risk_res6_mean.json\", \"w\") as f:\n", " json.dump(array_6_res_mean, f)" ] }, @@ -919,8 +927,8 @@ "metadata": {}, "outputs": [], "source": [ - "#get h3 array for resolution 5 and raster of 10km resolution\n", - "array_6_res_sum = get_h3_array(geom, raster_path_10km, 6, 'sum', 'wr_cotton_')" + "# get h3 array for resolution 5 and raster of 10km resolution\n", + "array_6_res_sum = get_h3_array(geom, raster_path_10km, 6, \"sum\", \"wr_cotton_\")" ] }, { @@ -930,8 +938,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export json\n", - "with open('./water_risk_res6_sum.json', 'w') as f:\n", + "# export json\n", + "with open(\"./water_risk_res6_sum.json\", \"w\") as f:\n", " json.dump(array_6_res_sum, f)" ] }, @@ -950,8 +958,8 @@ "metadata": {}, "outputs": [], "source": [ - "#get h3 array for resolution 5 and raster of 10km resolution\n", - "array_6_res = get_h3_array(geom, raster_path_30m, 6, 'sum', 'wr_cotton_')" + "# get h3 array for resolution 5 and raster of 10km resolution\n", + "array_6_res = get_h3_array(geom, raster_path_30m, 6, \"sum\", \"wr_cotton_\")" ] }, { @@ -961,8 +969,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export json\n", - "with open('./df_cotton_res6.json', 'w') as f:\n", + "# export json\n", + "with open(\"./df_cotton_res6.json\", \"w\") as f:\n", " json.dump(array_6_res, f)" ] }, @@ -981,8 +989,8 @@ "metadata": {}, "outputs": [], "source": [ - "#get h3 array for resolution 5 and raster of 10km resolution\n", - "array_8_res = get_h3_array(geom, raster_path_30m, 8, 'sum', 'wr_cotton_')" + "# get h3 array for resolution 5 and raster of 10km resolution\n", + "array_8_res = get_h3_array(geom, raster_path_30m, 8, \"sum\", \"wr_cotton_\")" ] }, { @@ -992,8 +1000,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export json\n", - "with open('./df_cotton_res8.json', 'w') as f:\n", + "# export json\n", + "with open(\"./df_cotton_res8.json\", \"w\") as f:\n", " json.dump(array_8_res, f)" ] }, @@ -1012,8 +1020,8 @@ "metadata": {}, "outputs": [], "source": [ - "#get h3 array for resolution 5 and raster of 10km resolution\n", - "array_9_res = get_h3_array(geom, raster_path_30m, 9, 'sum', 'wr_cotton_')" + "# get h3 array for resolution 5 and raster of 10km resolution\n", + "array_9_res = get_h3_array(geom, raster_path_30m, 9, \"sum\", \"wr_cotton_\")" ] }, { @@ -1023,8 +1031,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export json\n", - "with open('./df_cotton_res9.json', 'w') as f:\n", + "# export json\n", + "with open(\"./df_cotton_res9.json\", \"w\") as f:\n", " json.dump(array_9_res, f)" ] }, @@ -1043,9 +1051,9 @@ "metadata": {}, "outputs": [], "source": [ - "#explore the res 9 but with weighted mean - difference with weighted sum\n", - "#get h3 array for resolution 5 and raster of 10km resolution\n", - "array_9_res = get_h3_array(geom, raster_path_30m, 9, 'mean', 'wr_cotton_')" + "# explore the res 9 but with weighted mean - difference with weighted sum\n", + "# get h3 array for resolution 5 and raster of 10km resolution\n", + "array_9_res = get_h3_array(geom, raster_path_30m, 9, \"mean\", \"wr_cotton_\")" ] }, { @@ -1055,8 +1063,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export json\n", - "with open('./df_cotton_res9_mean.json', 'w') as f:\n", + "# export json\n", + "with open(\"./df_cotton_res9_mean.json\", \"w\") as f:\n", " json.dump(array_9_res, f)" ] }, @@ -1075,8 +1083,8 @@ "metadata": {}, "outputs": [], "source": [ - "#get h3 array for resolution 5 and raster of 10km resolution\n", - "array_6_res_world = get_h3_array(geom_world, raster_path_10km, 6, 'mean', 'wr_cotton_')" + "# get h3 array for resolution 5 and raster of 10km resolution\n", + "array_6_res_world = get_h3_array(geom_world, raster_path_10km, 6, \"mean\", \"wr_cotton_\")" ] }, { @@ -1086,8 +1094,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export json\n", - "with open('./water_risk_cotton_res6_mean_global.json', 'w') as f:\n", + "# export json\n", + "with open(\"./water_risk_cotton_res6_mean_global.json\", \"w\") as f:\n", " json.dump(array_6_res_world_3857, f)" ] }, @@ -1098,7 +1106,7 @@ "metadata": {}, "outputs": [], "source": [ - "array_6_res_world_clean = [el for el in array_6_res_world if el['sumStats'] != None]\n" + "array_6_res_world_clean = [el for el in array_6_res_world if el[\"sumStats\"] is not None]" ] }, { @@ -1108,7 +1116,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open('./water_risk_cotton_res6_mean_global_clean.json', 'w') as f:\n", + "with open(\"./water_risk_cotton_res6_mean_global_clean.json\", \"w\") as f:\n", " json.dump(array_6_res_world_clean, f)" ] }, @@ -1127,8 +1135,8 @@ "metadata": {}, "outputs": [], "source": [ - "#get h3 array for resolution 5 and raster of 10km resolution\n", - "array_1_res_world = get_h3_array(geom_world, raster_path_10km, 1, 'sum', 'wr_cotton_')" + "# get h3 array for resolution 5 and raster of 10km resolution\n", + "array_1_res_world = get_h3_array(geom_world, raster_path_10km, 1, \"sum\", \"wr_cotton_\")" ] }, { @@ -1138,8 +1146,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export json\n", - "with open('./water_risk_cotton_res1_mean_global.json', 'w') as f:\n", + "# export json\n", + "with open(\"./water_risk_cotton_res1_mean_global.json\", \"w\") as f:\n", " json.dump(array_1_res_world, f)" ] }, @@ -1158,8 +1166,8 @@ "metadata": {}, "outputs": [], "source": [ - "#get h3 array for resolution 5 and raster of 10km resolution\n", - "array_3_res_world = get_h3_array(geom_world, raster_path_10km, 3, 'sum', 'wr_cotton_')" + "# get h3 array for resolution 5 and raster of 10km resolution\n", + "array_3_res_world = get_h3_array(geom_world, raster_path_10km, 3, \"sum\", \"wr_cotton_\")" ] }, { @@ -1169,8 +1177,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export json\n", - "with open('./water_risk_cotton_res3_mean_global.json', 'w') as f:\n", + "# export json\n", + "with open(\"./water_risk_cotton_res3_mean_global.json\", \"w\") as f:\n", " json.dump(array_3_res_world, f)" ] }, @@ -1189,8 +1197,8 @@ "metadata": {}, "outputs": [], "source": [ - "#get h3 array for resolution 5 and raster of 10km resolution\n", - "array_5_res_world = get_h3_array(geom_world, raster_path_10km, 5, 'sum', 'wr_cotton_')" + "# get h3 array for resolution 5 and raster of 10km resolution\n", + "array_5_res_world = get_h3_array(geom_world, raster_path_10km, 5, \"sum\", \"wr_cotton_\")" ] }, { @@ -1200,8 +1208,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export json\n", - "with open('./water_risk_cotton_res5_sum_global.json', 'w') as f:\n", + "# export json\n", + "with open(\"./water_risk_cotton_res5_sum_global.json\", \"w\") as f:\n", " json.dump(array_5_res_world, f)" ] }, @@ -1212,9 +1220,9 @@ "metadata": {}, "outputs": [], "source": [ - "#clean none from res 5\n", + "# clean none from res 5\n", "\n", - "with open('./water_risk_cotton_res5_mean_global.json', 'r') as f:\n", + "with open(\"./water_risk_cotton_res5_mean_global.json\", \"r\") as f:\n", " array_5_res_world = json.load(f)" ] }, @@ -1225,7 +1233,7 @@ "metadata": {}, "outputs": [], "source": [ - "array_5_res_world = [el for el in array_5_res_world if el['sumStats'] != None]" + "array_5_res_world = [el for el in array_5_res_world if el[\"sumStats\"] is not None]" ] }, { @@ -1243,8 +1251,8 @@ "metadata": {}, "outputs": [], "source": [ - "#get h3 array for resolution 5 and raster of 10km resolution\n", - "array_6_res_world = get_h3_array(geom_world, raster_path_10km, 6, 'sum', 'wr_cotton_')" + "# get h3 array for resolution 5 and raster of 10km resolution\n", + "array_6_res_world = get_h3_array(geom_world, raster_path_10km, 6, \"sum\", \"wr_cotton_\")" ] }, { @@ -1254,8 +1262,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export json\n", - "with open('./water_risk_cotton_res6_sum_global.json', 'w') as f:\n", + "# export json\n", + "with open(\"./water_risk_cotton_res6_sum_global.json\", \"w\") as f:\n", " json.dump(array_6_res_world, f)" ] }, @@ -1266,7 +1274,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open('./water_risk_cotton_res6_sum_global.json', 'r') as f:\n", + "with open(\"./water_risk_cotton_res6_sum_global.json\", \"r\") as f:\n", " array_6_res_world = json.load(f)" ] }, @@ -1277,7 +1285,7 @@ "metadata": {}, "outputs": [], "source": [ - "array_6_res_world = [el for el in array_6_res_world if el['sumStats'] != None]\n" + "array_6_res_world = [el for el in array_6_res_world if el[\"sumStats\"] is not None]" ] }, { @@ -1307,10 +1315,7 @@ "source": [ "_array = []\n", "for feature in generator:\n", - " element = {\n", - " 'hexId':feature['properties']['hexid'], \n", - " 'geometry':feature['geometry']\n", - " }\n", + " element = {\"hexId\": feature[\"properties\"][\"hexid\"], \"geometry\": feature[\"geometry\"]}\n", " _array.append(element)" ] }, @@ -1401,12 +1406,11 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "geometries = []\n", - "for i,row in gdf.iterrows():\n", - " geom = shape(row['geometry'])\n", + "for i, row in gdf.iterrows():\n", + " geom = shape(row[\"geometry\"])\n", " geometries.append(geom)\n", - "gdf['geometry']=geometries" + "gdf[\"geometry\"] = geometries" ] }, { @@ -1416,7 +1420,7 @@ "metadata": {}, "outputs": [], "source": [ - "gdf = gdf.set_geometry('geometry')\n", + "gdf = gdf.set_geometry(\"geometry\")\n", "gdf.crs" ] }, @@ -1496,8 +1500,8 @@ } ], "source": [ - "gdf = gdf.set_crs('EPSG:4326')\n", - "gdf = gdf.to_crs('EPSG:3857')\n", + "gdf = gdf.set_crs(\"EPSG:4326\")\n", + "gdf = gdf.to_crs(\"EPSG:3857\")\n", "gdf.head()" ] }, @@ -1508,8 +1512,7 @@ "metadata": {}, "outputs": [], "source": [ - "gdf.to_file('./world_geom_3857.json',\n", - " driver='GeoJSON')" + "gdf.to_file(\"./world_geom_3857.json\", driver=\"GeoJSON\")" ] }, { @@ -1527,7 +1530,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open('./water_risk_cotton_res6_mean_global_clean.json', 'r') as f:\n", + "with open(\"./water_risk_cotton_res6_mean_global_clean.json\", \"r\") as f:\n", " array_6_res_world_clean = json.load(f)" ] }, @@ -1619,8 +1622,8 @@ "outputs": [], "source": [ "geometries = []\n", - "for i,row in gdf.iterrows():\n", - " hexid = row['hexId']\n", + "for i, row in gdf.iterrows():\n", + " hexid = row[\"hexId\"]\n", " coords = h3.h3_set_to_multi_polygon([hexid], geo_json=True)\n", " geom_feature = {\"type\": \"Polygon\", \"coordinates\": coords[0]}\n", " geom = shape(geom_feature)\n", @@ -1716,8 +1719,8 @@ } ], "source": [ - "#append geometry in epsg4326\n", - "gdf['geometry']= geometries\n", + "# append geometry in epsg4326\n", + "gdf[\"geometry\"] = geometries\n", "gdf.head()" ] }, @@ -1728,7 +1731,7 @@ "metadata": {}, "outputs": [], "source": [ - "gdf = gdf.set_geometry('geometry')\n", + "gdf = gdf.set_geometry(\"geometry\")\n", "gdf.crs" ] }, @@ -1763,9 +1766,9 @@ } ], "source": [ - "#set crs to epsg4326 and reproject to epsg3857\n", - "gdf = gdf.set_crs('EPSG:4326')\n", - "gdf = gdf.to_crs('EPSG:3857')\n", + "# set crs to epsg4326 and reproject to epsg3857\n", + "gdf = gdf.set_crs(\"EPSG:4326\")\n", + "gdf = gdf.to_crs(\"EPSG:3857\")\n", "gdf.crs" ] }, @@ -1776,9 +1779,8 @@ "metadata": {}, "outputs": [], "source": [ - "#save as json\n", - "gdf.to_file('./water_risk_cotton_res6_mean_global_clean_3857.json',\n", - " driver='GeoJSON')" + "# save as json\n", + "gdf.to_file(\"./water_risk_cotton_res6_mean_global_clean_3857.json\", driver=\"GeoJSON\")" ] }, { @@ -1796,7 +1798,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open('./water_risk_cotton_res6_mean_global_clean.json', 'r') as f:\n", + "with open(\"./water_risk_cotton_res6_mean_global_clean.json\", \"r\") as f:\n", " array_6_res_world_clean = json.load(f)" ] }, @@ -1888,10 +1890,10 @@ "outputs": [], "source": [ "geometries = []\n", - "for i,row in gdf.iterrows():\n", - " hexid = row['hexId']\n", + "for i, row in gdf.iterrows():\n", + " hexid = row[\"hexId\"]\n", " centroid = h3.h3_to_geo(hexid)\n", - " point = Point(centroid[1],centroid[0])\n", + " point = Point(centroid[1], centroid[0])\n", " geometries.append(point)" ] }, @@ -1977,7 +1979,7 @@ } ], "source": [ - "gdf['geometry'] = geometries\n", + "gdf[\"geometry\"] = geometries\n", "gdf.head()" ] }, @@ -1988,8 +1990,8 @@ "metadata": {}, "outputs": [], "source": [ - "gdf = gdf.set_geometry('geometry')\n", - "gdf = gdf.set_crs('EPSG:4326')" + "gdf = gdf.set_geometry(\"geometry\")\n", + "gdf = gdf.set_crs(\"EPSG:4326\")" ] }, { @@ -1999,8 +2001,7 @@ "metadata": {}, "outputs": [], "source": [ - "gdf.to_file('./water_risk_cotton_res6_mean_global_clean_point.json',\n", - " driver='GeoJSON')" + "gdf.to_file(\"./water_risk_cotton_res6_mean_global_clean_point.json\", driver=\"GeoJSON\")" ] }, { @@ -2152,7 +2153,7 @@ ], "source": [ "## import user data\n", - "user_data = gpd.read_file('../../datasets/processed/user_data/located_lg_data_polygon_v2.shp')\n", + "user_data = gpd.read_file(\"../../datasets/processed/user_data/located_lg_data_polygon_v2.shp\")\n", "user_data.head()" ] }, @@ -2184,7 +2185,7 @@ } ], "source": [ - "user_data[user_data['Material']=='Cotton'].iloc[0]" + "user_data[user_data[\"Material\"] == \"Cotton\"].iloc[0]" ] }, { @@ -2194,9 +2195,9 @@ "metadata": {}, "outputs": [], "source": [ - "#check with one location\n", - "geom = user_data[user_data['Material']=='Cotton'].iloc[0]['geometry']\n", - "generator = generate_h3_features(geom, 6)\n" + "# check with one location\n", + "geom = user_data[user_data[\"Material\"] == \"Cotton\"].iloc[0][\"geometry\"]\n", + "generator = generate_h3_features(geom, 6)" ] }, { @@ -2206,9 +2207,10 @@ "metadata": {}, "outputs": [], "source": [ - "test_china = [{'volume':2400,\n", - " 'hexid':feature['properties']['hexid'],\n", - " 'geometry':feature['geometry']} for feature in generator]" + "test_china = [\n", + " {\"volume\": 2400, \"hexid\": feature[\"properties\"][\"hexid\"], \"geometry\": feature[\"geometry\"]}\n", + " for feature in generator\n", + "]" ] }, { @@ -2238,15 +2240,15 @@ "outputs": [], "source": [ "harvest_area_fraction_raster = {\n", - " 'Rubber': '../../datasets/raw/crop_data/rubber/rubber_HarvestedAreaFraction.tif',\n", - " 'Cotton': '../../datasets/raw/crop_data/cotton/cotton_HarvestedAreaFraction.tif',\n", - " 'Leather': '../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/asture2000_5m_ext_v2.tif'\n", + " \"Rubber\": \"../../datasets/raw/crop_data/rubber/rubber_HarvestedAreaFraction.tif\",\n", + " \"Cotton\": \"../../datasets/raw/crop_data/cotton/cotton_HarvestedAreaFraction.tif\",\n", + " \"Leather\": \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/asture2000_5m_ext_v2.tif\",\n", "}\n", "\n", "yield_raster = {\n", - " 'Rubber': '../../datasets/raw/crop_data/rubber/rubber_YieldPerHectare.tif',\n", - " 'Cotton': '../../datasets/raw/crop_data/cotton/cotton_YieldPerHectare.tif',\n", - " 'Leather': '../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield_v5_NoZeros.tif'\n", + " \"Rubber\": \"../../datasets/raw/crop_data/rubber/rubber_YieldPerHectare.tif\",\n", + " \"Cotton\": \"../../datasets/raw/crop_data/cotton/cotton_YieldPerHectare.tif\",\n", + " \"Leather\": \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield_v5_NoZeros.tif\",\n", "}" ] }, @@ -2273,11 +2275,11 @@ } ], "source": [ - "#zonal stats for harvest area\n", + "# zonal stats for harvest area\n", "start_time = time.time()\n", - "material = user_data.iloc[0]['Material']\n", + "material = user_data.iloc[0][\"Material\"]\n", "raster_path_ha = harvest_area_fraction_raster[material]\n", - "_array_ha = get_h3_array(geom, raster_path_ha, 6, 'mean', 'ha')\n", + "_array_ha = get_h3_array(geom, raster_path_ha, 6, \"mean\", \"ha\")\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -2288,11 +2290,11 @@ "metadata": {}, "outputs": [], "source": [ - "#parse harvest area fraction to h3 index\n", + "# parse harvest area fraction to h3 index\n", "for el in test_china:\n", - " harvest_area_list = [ha['sumStats'] for ha in _array_ha if ha['hexId'] == el['hexid']]\n", - " harvest_area = harvest_area_list[0] if len(harvest_area_list)>0 else 0\n", - " el['ha']=harvest_area" + " harvest_area_list = [ha[\"sumStats\"] for ha in _array_ha if ha[\"hexId\"] == el[\"hexid\"]]\n", + " harvest_area = harvest_area_list[0] if len(harvest_area_list) > 0 else 0\n", + " el[\"ha\"] = harvest_area" ] }, { @@ -2303,8 +2305,10 @@ "outputs": [], "source": [ "# export unique user data\n", - "with open('../../datasets/processed/user_data/located_lg_data_polygon_v2_h3_unique_china_ha.json', 'w') as f:\n", - " json.dump(test_china,f)" + "with open(\n", + " \"../../datasets/processed/user_data/located_lg_data_polygon_v2_h3_unique_china_ha.json\", \"w\"\n", + ") as f:\n", + " json.dump(test_china, f)" ] }, { @@ -2314,7 +2318,9 @@ "metadata": {}, "outputs": [], "source": [ - "with open('../../datasets/processed/user_data/located_lg_data_polygon_v2_h3_unique_china_ha.json','r') as f:\n", + "with open(\n", + " \"../../datasets/processed/user_data/located_lg_data_polygon_v2_h3_unique_china_ha.json\", \"r\"\n", + ") as f:\n", " test_china = json.load(f)" ] }, @@ -2325,15 +2331,15 @@ "metadata": {}, "outputs": [], "source": [ - "#get sum of haf to calculate probability area distribution\n", + "# get sum of haf to calculate probability area distribution\n", "\n", - "total_ha = sum([el['ha'] for el in test_china])\n", + "total_ha = sum([el[\"ha\"] for el in test_china])\n", "\n", - "#calculate probability area\n", + "# calculate probability area\n", "\n", "for el in test_china:\n", - " p_dis = float((el['ha']*el['volume'])/total_ha) \n", - " el['p_dis']=p_dis" + " p_dis = float((el[\"ha\"] * el[\"volume\"]) / total_ha)\n", + " el[\"p_dis\"] = p_dis" ] }, { @@ -2343,8 +2349,8 @@ "metadata": {}, "outputs": [], "source": [ - "#remove 0\n", - "test_china = [el for el in test_china if el['p_dis'] !=0]" + "# remove 0\n", + "test_china = [el for el in test_china if el[\"p_dis\"] != 0]" ] }, { @@ -2355,8 +2361,11 @@ "outputs": [], "source": [ "# export unique user data\n", - "with open('../../datasets/processed/user_data/located_lg_data_polygon_v2_h3_unique_china_ha_pdis.json', 'w') as f:\n", - " json.dump(test_china,f)\n" + "with open(\n", + " \"../../datasets/processed/user_data/located_lg_data_polygon_v2_h3_unique_china_ha_pdis.json\",\n", + " \"w\",\n", + ") as f:\n", + " json.dump(test_china, f)" ] }, { @@ -2367,8 +2376,11 @@ "outputs": [], "source": [ "# export unique user data\n", - "with open('../../datasets/processed/user_data/located_lg_data_polygon_v2_h3_unique_china_ha_pdis.json', 'r') as f:\n", - " test_china = json.load(f)\n" + "with open(\n", + " \"../../datasets/processed/user_data/located_lg_data_polygon_v2_h3_unique_china_ha_pdis.json\",\n", + " \"r\",\n", + ") as f:\n", + " test_china = json.load(f)" ] }, { @@ -2455,8 +2467,10 @@ } ], "source": [ - "#risk map in h3 - water risk cotton\n", - "cotton_water_risk = pd.read_json('../../datasets/processed/water_indicators/water_risk_cotton_res6_mean_global_clean.json')\n", + "# risk map in h3 - water risk cotton\n", + "cotton_water_risk = pd.read_json(\n", + " \"../../datasets/processed/water_indicators/water_risk_cotton_res6_mean_global_clean.json\"\n", + ")\n", "cotton_water_risk.head()" ] }, @@ -2680,8 +2694,10 @@ } ], "source": [ - "#calculation of metric\n", - "merge_df = pd.merge(user_data_china,cotton_water_risk, how= 'inner', left_on='hexid', right_on='hexId')\n", + "# calculation of metric\n", + "merge_df = pd.merge(\n", + " user_data_china, cotton_water_risk, how=\"inner\", left_on=\"hexid\", right_on=\"hexId\"\n", + ")\n", "merge_df.head()" ] }, @@ -2692,16 +2708,16 @@ "metadata": {}, "outputs": [], "source": [ - "#save as json\n", + "# save as json\n", "china_impact = []\n", - "for i,row in merge_df.iterrows():\n", + "for i, row in merge_df.iterrows():\n", " element = {\n", - " 'volume':row['volume'],\n", - " 'hexid':row['hexid'],\n", - " 'geometry':row['geometry'],\n", - " 'impact':float(row['p_dis']*row['sumStats'])\n", + " \"volume\": row[\"volume\"],\n", + " \"hexid\": row[\"hexid\"],\n", + " \"geometry\": row[\"geometry\"],\n", + " \"impact\": float(row[\"p_dis\"] * row[\"sumStats\"]),\n", " }\n", - " china_impact.append(element)\n" + " china_impact.append(element)" ] }, { @@ -2711,8 +2727,8 @@ "metadata": {}, "outputs": [], "source": [ - "with open('../../datasets/processed/water_indicators/water impact_china_h3.json','w') as f:\n", - " json.dump(china_impact,f)" + "with open(\"../../datasets/processed/water_indicators/water impact_china_h3.json\", \"w\") as f:\n", + " json.dump(china_impact, f)" ] }, { @@ -2722,7 +2738,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open('../../datasets/processed/water_indicators/water impact_china_h3.json', 'r') as f:\n", + "with open(\"../../datasets/processed/water_indicators/water impact_china_h3.json\", \"r\") as f:\n", " china_test = json.load(f)" ] }, @@ -2920,7 +2936,7 @@ } ], "source": [ - "gdf['geometry'] = [shape(row['geometry']) for i,row in gdf.iterrows()]\n", + "gdf[\"geometry\"] = [shape(row[\"geometry\"]) for i, row in gdf.iterrows()]\n", "gdf.head()" ] }, @@ -2931,9 +2947,7 @@ "metadata": {}, "outputs": [], "source": [ - "gdf.to_file(\n", - " './china_test.shp',\n", - " driver='ESRI Shapefile')" + "gdf.to_file(\"./china_test.shp\", driver=\"ESRI Shapefile\")" ] }, { @@ -3003,11 +3017,11 @@ "mu, sigma = 0, 0.5\n", "\n", "measured = np.random.normal(mu, sigma, 1000)\n", - "hist, edges = np.histogram(gdf['impact'], density=True, bins=50)\n", + "hist, edges = np.histogram(gdf[\"impact\"], density=True, bins=50)\n", "\n", "x = np.linspace(-2, 2, 1000)\n", - "pdf = 1/(sigma * np.sqrt(2*np.pi)) * np.exp(-(x-mu)**2 / (2*sigma**2))\n", - "cdf = (1+scipy.special.erf((x-mu)/np.sqrt(2*sigma**2)))/2\n", + "pdf = 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-((x - mu) ** 2) / (2 * sigma**2))\n", + "cdf = (1 + scipy.special.erf((x - mu) / np.sqrt(2 * sigma**2))) / 2\n", "\n", "p1 = make_plot(\"Normal Distribution (μ=0, σ=0.5)\", hist, edges, x, pdf, cdf)\n", "\n", @@ -3016,11 +3030,11 @@ "mu, sigma = 0, 0.5\n", "\n", "measured = np.random.lognormal(mu, sigma, 1000)\n", - "hist, edges = np.histogram(gdf['impact'], density=True, bins=50)\n", + "hist, edges = np.histogram(gdf[\"impact\"], density=True, bins=50)\n", "\n", "x = np.linspace(0.0001, 8.0, 1000)\n", - "pdf = 1/(x* sigma * np.sqrt(2*np.pi)) * np.exp(-(np.log(x)-mu)**2 / (2*sigma**2))\n", - "cdf = (1+scipy.special.erf((np.log(x)-mu)/(np.sqrt(2)*sigma)))/2\n", + "pdf = 1 / (x * sigma * np.sqrt(2 * np.pi)) * np.exp(-((np.log(x) - mu) ** 2) / (2 * sigma**2))\n", + "cdf = (1 + scipy.special.erf((np.log(x) - mu) / (np.sqrt(2) * sigma))) / 2\n", "\n", "p2 = make_plot(\"Log Normal Distribution (μ=0, σ=0.5)\", hist, edges, x, pdf, cdf)\n", "\n", @@ -3029,29 +3043,29 @@ "k, theta = 7.5, 1.0\n", "\n", "measured = np.random.gamma(k, theta, 1000)\n", - "hist, edges = np.histogram(gdf['impact'], density=True, bins=50)\n", + "hist, edges = np.histogram(gdf[\"impact\"], density=True, bins=50)\n", "\n", "x = np.linspace(0.0001, 20.0, 1000)\n", - "pdf = x**(k-1) * np.exp(-x/theta) / (theta**k * scipy.special.gamma(k))\n", - "cdf = scipy.special.gammainc(k, x/theta)\n", + "pdf = x ** (k - 1) * np.exp(-x / theta) / (theta**k * scipy.special.gamma(k))\n", + "cdf = scipy.special.gammainc(k, x / theta)\n", "\n", "p3 = make_plot(\"Gamma Distribution (k=7.5, θ=1)\", hist, edges, x, pdf, cdf)\n", "\n", "# Weibull Distribution\n", "\n", "lam, k = 1, 1.25\n", - "measured = lam*(-np.log(np.random.uniform(0, 1, 1000)))**(1/k)\n", - "hist, edges = np.histogram(gdf['impact'], density=True, bins=50)\n", + "measured = lam * (-np.log(np.random.uniform(0, 1, 1000))) ** (1 / k)\n", + "hist, edges = np.histogram(gdf[\"impact\"], density=True, bins=50)\n", "\n", "x = np.linspace(0.0001, 8, 1000)\n", - "pdf = (k/lam)*(x/lam)**(k-1) * np.exp(-(x/lam)**k)\n", - "cdf = 1 - np.exp(-(x/lam)**k)\n", + "pdf = (k / lam) * (x / lam) ** (k - 1) * np.exp(-((x / lam) ** k))\n", + "cdf = 1 - np.exp(-((x / lam) ** k))\n", "\n", "p4 = make_plot(\"Weibull Distribution (λ=1, k=1.25)\", hist, edges, x, pdf, cdf)\n", "\n", - "output_file('histogram.html', title=\"histogram.py example\")\n", + "output_file(\"histogram.html\", title=\"histogram.py example\")\n", "\n", - "show(gridplot([p1,p2,p3,p4], ncols=2, plot_width=400, plot_height=400, toolbar_location=None))" + "show(gridplot([p1, p2, p3, p4], ncols=2, plot_width=400, plot_height=400, toolbar_location=None))" ] }, { @@ -3070,8 +3084,9 @@ ], "source": [ "# using this h3 methodology - the total impact would be the sum of the distributed impacts\n", - "print(f\"The unsustainable water use impact for buying 2400 tonnes of cotton in China would be {sum(gdf['impact'])} m3 / year\")\n", - "\n" + "print(\n", + " f\"The unsustainable water use impact for buying 2400 tonnes of cotton in China would be {sum(gdf['impact'])} m3 / year\"\n", + ")" ] }, { @@ -3236,9 +3251,9 @@ } ], "source": [ - "# download projection over time - \n", + "# download projection over time -\n", "\n", - "ha_00_19 = pd.read_csv('../../datasets/raw/crop_data/FAOSTAT_ha_2000_2019.csv')\n", + "ha_00_19 = pd.read_csv(\"../../datasets/raw/crop_data/FAOSTAT_ha_2000_2019.csv\")\n", "ha_00_19.head()" ] }, @@ -3260,7 +3275,7 @@ } ], "source": [ - "ha_00_19[(ha_00_19['Year']==2000) & (ha_00_19['Area']=='Afghanistan')]['Value'][0]" + "ha_00_19[(ha_00_19[\"Year\"] == 2000) & (ha_00_19[\"Area\"] == \"Afghanistan\")][\"Value\"][0]" ] }, { @@ -3275,21 +3290,19 @@ "\n", "ha_byYear = []\n", "for country in unique_countries:\n", - " element = {\n", - " 'country': country\n", - " }\n", + " element = {\"country\": country}\n", " for year in unique_years:\n", " try:\n", - " value = float(list(ha_00_19[(ha_00_19['Area']==country) & (ha_00_19['Year']==year)]['Value'])[0])\n", + " value = float(\n", + " list(ha_00_19[(ha_00_19[\"Area\"] == country) & (ha_00_19[\"Year\"] == year)][\"Value\"])[\n", + " 0\n", + " ]\n", + " )\n", " except:\n", " value = 0\n", - " \n", - " \n", - " element[f'{year}'] = value\n", - " ha_byYear.append(element)\n", - " \n", - " \n", - " " + "\n", + " element[f\"{year}\"] = value\n", + " ha_byYear.append(element)" ] }, { @@ -3710,12 +3723,33 @@ } ], "source": [ - "pct_change_df = ha_df[['2000', '2001', '2002', '2003', '2004', '2005', '2006',\n", - " '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015',\n", - " '2016', '2017', '2018', '2019']].pct_change(axis=1)\n", + "pct_change_df = ha_df[\n", + " [\n", + " \"2000\",\n", + " \"2001\",\n", + " \"2002\",\n", + " \"2003\",\n", + " \"2004\",\n", + " \"2005\",\n", + " \"2006\",\n", + " \"2007\",\n", + " \"2008\",\n", + " \"2009\",\n", + " \"2010\",\n", + " \"2011\",\n", + " \"2012\",\n", + " \"2013\",\n", + " \"2014\",\n", + " \"2015\",\n", + " \"2016\",\n", + " \"2017\",\n", + " \"2018\",\n", + " \"2019\",\n", + " ]\n", + "].pct_change(axis=1)\n", "\n", - "#add countries\n", - "pct_change_df['country']=ha_df['country']\n", + "# add countries\n", + "pct_change_df[\"country\"] = ha_df[\"country\"]\n", "pct_change_df.head()" ] }, @@ -3726,7 +3760,7 @@ "metadata": {}, "outputs": [], "source": [ - "pct_change_df.to_csv('../../datasets/raw/crop_data/projection_factor_byCountry.csv')" + "pct_change_df.to_csv(\"../../datasets/raw/crop_data/projection_factor_byCountry.csv\")" ] }, { @@ -3840,9 +3874,9 @@ } ], "source": [ - "#filter by china mainland\n", - "pct_change_china = pct_change_df[pct_change_df['country']=='China, mainland']\n", - "pct_change_china['2000']=0\n", + "# filter by china mainland\n", + "pct_change_china = pct_change_df[pct_change_df[\"country\"] == \"China, mainland\"]\n", + "pct_change_china[\"2000\"] = 0\n", "pct_change_china" ] }, @@ -3864,7 +3898,7 @@ } ], "source": [ - "pct_change_china['2001'].iloc[0]" + "pct_change_china[\"2001\"].iloc[0]" ] }, { @@ -3961,40 +3995,51 @@ } ], "source": [ - "#to json\n", + "# to json\n", "pct_change_china_json = {}\n", "for el in pct_change_china.columns:\n", - " if el != 'country':\n", - " pct_change_china_json[el]=pct_change_china[el].iloc[0]\n", + " if el != \"country\":\n", + " pct_change_china_json[el] = pct_change_china[el].iloc[0]\n", "\n", - "#total_volume is 2400\n", + "# total_volume is 2400\n", "total_vol = 2400\n", - "#value is going to be (2000val + (factor*2000val))\n", - "#project average\n", - "average_risk = sum(merge_df['sumStats'])/len(merge_df['sumStats'])\n", - "pr_average_imp = [(average_risk + pct_change_china_json[f'{year}']*average_risk)*total_vol for year in range(2000,2020)]\n", + "# value is going to be (2000val + (factor*2000val))\n", + "# project average\n", + "average_risk = sum(merge_df[\"sumStats\"]) / len(merge_df[\"sumStats\"])\n", + "pr_average_imp = [\n", + " (average_risk + pct_change_china_json[f\"{year}\"] * average_risk) * total_vol\n", + " for year in range(2000, 2020)\n", + "]\n", "\n", - "#project min\n", - "min_risk = min(merge_df['sumStats'])\n", - "pr_min_imp = [(min_risk + pct_change_china_json[f'{year}']*min_risk)*total_vol for year in range(2000,2020)]\n", + "# project min\n", + "min_risk = min(merge_df[\"sumStats\"])\n", + "pr_min_imp = [\n", + " (min_risk + pct_change_china_json[f\"{year}\"] * min_risk) * total_vol\n", + " for year in range(2000, 2020)\n", + "]\n", "\n", - "#project max\n", - "max_risk = max(merge_df['sumStats'])\n", - "pr_max_imp = [(max_risk + pct_change_china_json[f'{year}']*max_risk)*total_vol for year in range(2000,2020)]\n", + "# project max\n", + "max_risk = max(merge_df[\"sumStats\"])\n", + "pr_max_imp = [\n", + " (max_risk + pct_change_china_json[f\"{year}\"] * max_risk) * total_vol\n", + " for year in range(2000, 2020)\n", + "]\n", "\n", "\n", - "#project sum\n", - "total_impact = sum(gdf['impact']) \n", - "pr_total_imp = [(total_impact + pct_change_china_json[f'{year}']*total_impact) for year in range(2000,2020)]\n", + "# project sum\n", + "total_impact = sum(gdf[\"impact\"])\n", + "pr_total_imp = [\n", + " (total_impact + pct_change_china_json[f\"{year}\"] * total_impact) for year in range(2000, 2020)\n", + "]\n", "\n", "\n", - "#generate dataframe\n", + "# generate dataframe\n", "df = pd.DataFrame()\n", - "df['year']=[year for year in range(2000,2020)]\n", - "df['average_imp']=pr_average_imp\n", - "df['min_imp']=pr_min_imp\n", - "df['max_imp']=pr_max_imp\n", - "df['total_imp']=pr_total_imp\n", + "df[\"year\"] = [year for year in range(2000, 2020)]\n", + "df[\"average_imp\"] = pr_average_imp\n", + "df[\"min_imp\"] = pr_min_imp\n", + "df[\"max_imp\"] = pr_max_imp\n", + "df[\"total_imp\"] = pr_total_imp\n", "df.head()" ] }, @@ -4070,19 +4115,26 @@ } ], "source": [ - "df['year'] = pd.to_datetime(df['year'], format='%Y')\n", + "df[\"year\"] = pd.to_datetime(df[\"year\"], format=\"%Y\")\n", "\n", "source = ColumnDataSource(df)\n", "\n", "p = figure(x_axis_type=\"datetime\")\n", "\n", - "p.line(x='year', y='average_imp', line_width=2, source=source, legend='Average impact')\n", - "p.line(x='year', y='min_imp', line_width=2, source=source, color=Spectral10[5], legend='Min impact')\n", - "p.line(x='year', y='max_imp', line_width=2, source=source, color=Spectral10[9], legend='Max impact')\n", - "p.line(x='year', y='total_imp', line_width=2, source=source, color=Spectral10[6], legend='Total impacts')\n", + "p.line(x=\"year\", y=\"average_imp\", line_width=2, source=source, legend=\"Average impact\")\n", + "p.line(x=\"year\", y=\"min_imp\", line_width=2, source=source, color=Spectral10[5], legend=\"Min impact\")\n", + "p.line(x=\"year\", y=\"max_imp\", line_width=2, source=source, color=Spectral10[9], legend=\"Max impact\")\n", + "p.line(\n", + " x=\"year\",\n", + " y=\"total_imp\",\n", + " line_width=2,\n", + " source=source,\n", + " color=Spectral10[6],\n", + " legend=\"Total impacts\",\n", + ")\n", "\n", - "p.title.text = 'Unsustainable water use impacts for Cotton in China'\n", - "p.yaxis.axis_label = 'm3 / year'\n", + "p.title.text = \"Unsustainable water use impacts for Cotton in China\"\n", + "p.yaxis.axis_label = \"m3 / year\"\n", "show(p)" ] }, diff --git a/data/notebooks/Lab/0_4_H3_data_processing.ipynb b/data/notebooks/Lab/0_4_H3_data_processing.ipynb index 299a8766b..15341d9c3 100644 --- a/data/notebooks/Lab/0_4_H3_data_processing.ipynb +++ b/data/notebooks/Lab/0_4_H3_data_processing.ipynb @@ -21,11 +21,12 @@ "outputs": [], "source": [ "# import libraries\n", - "import pandas as pd\n", + "import json\n", + "\n", "import geopandas as gpd\n", "import h3\n", - "import json\n", - "from shapely.geometry import shape, mapping" + "import pandas as pd\n", + "from shapely.geometry import shape" ] }, { @@ -34,11 +35,10 @@ "metadata": {}, "outputs": [], "source": [ + "import matplotlib.pyplot as plt\n", "import rasterio as rio\n", "import rasterio.plot\n", - "import matplotlib.pyplot as plt\n", - "from rasterstats import gen_zonal_stats, gen_point_query\n", - "from rasterstats import zonal_stats" + "from rasterstats import gen_point_query, gen_zonal_stats, zonal_stats" ] }, { @@ -50,15 +50,6 @@ "import time" ] }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "from processing.geolocating_data import GeolocateAddress" - ] - }, { "cell_type": "code", "execution_count": 5, @@ -190,9 +181,8 @@ ], "source": [ "# import user located data\n", - "user_data_polygon = gpd.read_file('../../datasets/processed/located_lg_data_polygon_v2.shp')\n", - "user_data_polygon.head()\n", - "\n" + "user_data_polygon = gpd.read_file(\"../../datasets/processed/located_lg_data_polygon_v2.shp\")\n", + "user_data_polygon.head()" ] }, { @@ -212,7 +202,7 @@ } ], "source": [ - "set(list(user_data_polygon['Material']))" + "set(list(user_data_polygon[\"Material\"]))" ] }, { @@ -242,7 +232,7 @@ } ], "source": [ - "user_data_polygon[user_data_polygon['Material']=='Cotton'].iloc[4]" + "user_data_polygon[user_data_polygon[\"Material\"] == \"Cotton\"].iloc[4]" ] }, { @@ -332,8 +322,8 @@ } ], "source": [ - "#lest select just one admin level 3 of india to reduce the computational time - this has been obtained from gadm\n", - "polygon_gdf = gpd.read_file('../../datasets/raw/Punjab_adm.shp')\n", + "# lest select just one admin level 3 of india to reduce the computational time - this has been obtained from gadm\n", + "polygon_gdf = gpd.read_file(\"../../datasets/raw/Punjab_adm.shp\")\n", "polygon_gdf" ] }, @@ -357,9 +347,9 @@ } ], "source": [ - "#select test location to perform calculations\n", + "# select test location to perform calculations\n", "\n", - "polygon = polygon_gdf.iloc[0]['geometry']\n", + "polygon = polygon_gdf.iloc[0][\"geometry\"]\n", "polygon" ] }, @@ -544,7 +534,7 @@ "source": [ "## import basins shapefile to test with front\n", "\n", - "basins = gpd.read_file('../../datasets/raw/basins_test_polygon.shp')\n", + "basins = gpd.read_file(\"../../datasets/raw/basins_test_polygon.shp\")\n", "basins.head()" ] }, @@ -568,7 +558,7 @@ } ], "source": [ - "basins['geometry'][0]" + "basins[\"geometry\"][0]" ] }, { @@ -597,15 +587,15 @@ } ], "source": [ - "#import blue water footprint cotton\n", - "with rio.open('../../datasets/raw/wfbl_mmyr_4326_cotton.tif') as src:\n", + "# import blue water footprint cotton\n", + "with rio.open(\"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((29.5,30.5))\n", - " ax.set_xlim((75,76))\n", - " rio.plot.show(dat, vmin=0, vmax=444, cmap='Blues', ax=ax, transform=src.transform)\n", - " polygon_gdf.plot(ax=ax, alpha=0.5, edgecolor='yellow')\n", - " ax.set_title('Cotton blue water footprint in India (dark blue: higher water footprint)')" + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((29.5, 30.5))\n", + " ax.set_xlim((75, 76))\n", + " rio.plot.show(dat, vmin=0, vmax=444, cmap=\"Blues\", ax=ax, transform=src.transform)\n", + " polygon_gdf.plot(ax=ax, alpha=0.5, edgecolor=\"yellow\")\n", + " ax.set_title(\"Cotton blue water footprint in India (dark blue: higher water footprint)\")" ] }, { @@ -626,39 +616,35 @@ "def generate_h3_df(geometry, res):\n", " \"\"\"\n", " Generate h3 for geometry\n", - " \n", + "\n", " Input\n", " ------\n", " geometry: shapely.polygon or shapely.multipolygon\n", - " \n", + "\n", " Output\n", " ------\n", " gdf with H3_hexes\n", " \"\"\"\n", " # Create an empty dataframe to write data into\n", - " h3_df = pd.DataFrame([],columns=['h3_id'])\n", - " if geometry.geom_type == 'MultiPolygon':\n", + " h3_df = pd.DataFrame([], columns=[\"h3_id\"])\n", + " if geometry.geom_type == \"MultiPolygon\":\n", " district_polygon = list(geometry)\n", " for polygon in district_polygon:\n", " poly_geojson = gpd.GeoSeries([polygon]).__geo_interface__\n", - " poly_geojson = poly_geojson['features'][0]['geometry'] \n", + " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n", " h3_hexes = h3.polyfill_geojson(poly_geojson, res)\n", " for h3_hex in h3_hexes:\n", - " h3_df.loc[len(h3_df)]=[\n", - " h3_hex\n", - " ]\n", - " elif geometry.geom_type == 'Polygon':\n", + " h3_df.loc[len(h3_df)] = [h3_hex]\n", + " elif geometry.geom_type == \"Polygon\":\n", " poly_geojson = gpd.GeoSeries(geometry).__geo_interface__\n", - " poly_geojson = poly_geojson['features'][0]['geometry']\n", + " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n", " h3_hexes = h3.polyfill_geojson(poly_geojson, res)\n", " for h3_hex in h3_hexes:\n", - " h3_df.loc[len(h3_df)]=[\n", - " h3_hex\n", - " ]\n", + " h3_df.loc[len(h3_df)] = [h3_hex]\n", " else:\n", - " print('Shape is not a polygon or multypolygon.')\n", - " \n", - " return h3_df\n" + " print(\"Shape is not a polygon or multypolygon.\")\n", + "\n", + " return h3_df" ] }, { @@ -670,22 +656,22 @@ "def generate_h3_features(geometry, res):\n", " \"\"\"\n", " Generate h3 for geometry\n", - " \n", + "\n", " Input\n", " ------\n", " geometry: shapely.polygon or shapely.multipolygon\n", - " \n", + "\n", " Output\n", " ------\n", " gdf with H3_hexes\n", " \"\"\"\n", " # Create an empty dataframe to write data into\n", - " h3_df = pd.DataFrame([],columns=['h3_id'])\n", - " if geometry.geom_type == 'MultiPolygon':\n", + " pd.DataFrame([], columns=[\"h3_id\"])\n", + " if geometry.geom_type == \"MultiPolygon\":\n", " district_polygon = list(geometry)\n", " for polygon in district_polygon:\n", " poly_geojson = gpd.GeoSeries([polygon]).__geo_interface__\n", - " poly_geojson = poly_geojson['features'][0]['geometry'] \n", + " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n", " h3_hexes = h3.polyfill_geojson(poly_geojson, res)\n", " for h3_hex in h3_hexes:\n", " coords = h3.h3_set_to_multi_polygon([h3_hex], geo_json=True)\n", @@ -694,9 +680,9 @@ " \"properties\": {\"hexid\": h3_hex},\n", " \"geometry\": {\"type\": \"Polygon\", \"coordinates\": coords[0]},\n", " }\n", - " elif geometry.geom_type == 'Polygon':\n", + " elif geometry.geom_type == \"Polygon\":\n", " poly_geojson = gpd.GeoSeries(geometry).__geo_interface__\n", - " poly_geojson = poly_geojson['features'][0]['geometry']\n", + " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n", " h3_hexes = h3.polyfill_geojson(poly_geojson, res)\n", " for h3_hex in h3_hexes:\n", " coords = h3.h3_set_to_multi_polygon([h3_hex], geo_json=True)\n", @@ -706,10 +692,7 @@ " \"geometry\": {\"type\": \"Polygon\", \"coordinates\": coords[0]},\n", " }\n", " else:\n", - " print('Shape is not a polygon or multypolygon.')\n", - " \n", - " \n", - " " + " print(\"Shape is not a polygon or multypolygon.\")" ] }, { @@ -728,7 +711,7 @@ "source": [ "## time to process the entire malasya\n", "start_time = time.time()\n", - "h3_adm_df = generate_h3_df(user_data_polygon['geometry'][1], 8)\n", + "h3_adm_df = generate_h3_df(user_data_polygon[\"geometry\"][1], 8)\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -746,7 +729,7 @@ } ], "source": [ - "## time to process the test geometry - \n", + "## time to process the test geometry -\n", "start_time = time.time()\n", "h3_adm_df = generate_h3_df(polygon, 8)\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" @@ -838,9 +821,9 @@ } ], "source": [ - "#time spend in generating the features in h3 for Malasya\n", + "# time spend in generating the features in h3 for Malasya\n", "start_time = time.time()\n", - "h3_features = generate_h3_features(test_location['geometry'], 8)\n", + "h3_features = generate_h3_features(test_location[\"geometry\"], 8)\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -858,7 +841,7 @@ } ], "source": [ - "#time spend in generating the features in h3 for the test polygon\n", + "# time spend in generating the features in h3 for the test polygon\n", "start_time = time.time()\n", "h3_features = generate_h3_features(polygon, 8)\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" @@ -878,9 +861,9 @@ } ], "source": [ - "#time spend in generating the features in h3 for the basins test in resolution 1\n", + "# time spend in generating the features in h3 for the basins test in resolution 1\n", "start_time = time.time()\n", - "h3_features = [generate_h3_features(poly, 4) for poly in basins['geometry']]\n", + "h3_features = [generate_h3_features(poly, 4) for poly in basins[\"geometry\"]]\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -898,9 +881,9 @@ } ], "source": [ - "#time spend in generating the features in h3 for the basins test in resolution 1\n", + "# time spend in generating the features in h3 for the basins test in resolution 1\n", "start_time = time.time()\n", - "h3_features_res5 = [generate_h3_features(poly, 5) for poly in basins['geometry']]\n", + "h3_features_res5 = [generate_h3_features(poly, 5) for poly in basins[\"geometry\"]]\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -918,9 +901,9 @@ } ], "source": [ - "#time spend in generating the features in h3 for the basins test in resolution 1\n", + "# time spend in generating the features in h3 for the basins test in resolution 1\n", "start_time = time.time()\n", - "h3_features_res7 = [generate_h3_features(poly, 7) for poly in basins['geometry']]\n", + "h3_features_res7 = [generate_h3_features(poly, 7) for poly in basins[\"geometry\"]]\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -945,21 +928,18 @@ } ], "source": [ - "\n", - "#summary statistics world main basins\n", + "# summary statistics world main basins\n", "start_time = time.time()\n", - "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n", + "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n", "# Outputs hexbin feautres with additional properties: \"population_sum\": \n", - "summ_stats_h3 = [gen_zonal_stats(\n", - " generator,\n", - " raster_path,\n", - " stats=\"max\",\n", - " prefix=\"m_\",\n", - " geojson_out=True,\n", - " all_touched=True\n", - " ) for generator in h3_features]\n", - " \n", - " \n", + "summ_stats_h3 = [\n", + " gen_zonal_stats(\n", + " generator, raster_path, stats=\"max\", prefix=\"m_\", geojson_out=True, all_touched=True\n", + " )\n", + " for generator in h3_features\n", + "]\n", + "\n", + "\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -977,21 +957,18 @@ } ], "source": [ - "\n", - "#summary statistics world main basins\n", + "# summary statistics world main basins\n", "start_time = time.time()\n", - "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n", + "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n", "# Outputs hexbin feautres with additional properties: \"population_sum\": \n", - "summ_stats_h3_res5 = [gen_zonal_stats(\n", - " generator,\n", - " raster_path,\n", - " stats=\"max\",\n", - " prefix=\"m_\",\n", - " geojson_out=True,\n", - " all_touched=True\n", - " ) for generator in h3_features_res5]\n", - " \n", - " \n", + "summ_stats_h3_res5 = [\n", + " gen_zonal_stats(\n", + " generator, raster_path, stats=\"max\", prefix=\"m_\", geojson_out=True, all_touched=True\n", + " )\n", + " for generator in h3_features_res5\n", + "]\n", + "\n", + "\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -1009,21 +986,18 @@ } ], "source": [ - "\n", - "#summary statistics world main basins\n", + "# summary statistics world main basins\n", "start_time = time.time()\n", - "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n", + "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n", "# Outputs hexbin feautres with additional properties: \"population_sum\": \n", - "summ_stats_h3_res7 = [gen_zonal_stats(\n", - " generator,\n", - " raster_path,\n", - " stats=\"max\",\n", - " prefix=\"m_\",\n", - " geojson_out=True,\n", - " all_touched=True\n", - " ) for generator in h3_features_res7]\n", - " \n", - " \n", + "summ_stats_h3_res7 = [\n", + " gen_zonal_stats(\n", + " generator, raster_path, stats=\"max\", prefix=\"m_\", geojson_out=True, all_touched=True\n", + " )\n", + " for generator in h3_features_res7\n", + "]\n", + "\n", + "\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -1041,10 +1015,10 @@ } ], "source": [ - "#summary statistics in malasya\n", + "# summary statistics in malasya\n", "start_time = time.time()\n", - "hexbin_generator = generate_h3_features(user_data_polygon['geometry'][1], 8)\n", - "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n", + "hexbin_generator = generate_h3_features(user_data_polygon[\"geometry\"][1], 8)\n", + "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n", "# Outputs hexbin feautres with additional properties: \"population_sum\": \n", "summ_stats_h3 = gen_zonal_stats(\n", " hexbin_generator,\n", @@ -1070,10 +1044,10 @@ } ], "source": [ - "#summary statistics in test geometry - adm level 3\n", + "# summary statistics in test geometry - adm level 3\n", "start_time = time.time()\n", "hexbin_generator = generate_h3_features(polygon, 8)\n", - "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n", + "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n", "# Outputs hexbin feautres with additional properties: \"population_sum\": \n", "summ_stats_h3 = gen_zonal_stats(\n", " hexbin_generator,\n", @@ -1081,7 +1055,7 @@ " stats=\"median std\",\n", " prefix=\"wfbl_mmyr_cotton\",\n", " geojson_out=True,\n", - " all_touched=True\n", + " all_touched=True,\n", ")\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] @@ -1100,10 +1074,10 @@ } ], "source": [ - "#summary statistics in test geometry - adm level 3\n", + "# summary statistics in test geometry - adm level 3\n", "start_time = time.time()\n", "hexbin_generator = generate_h3_features(polygon, 4)\n", - "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n", + "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n", "# Outputs hexbin feautres with additional properties: \"population_sum\": \n", "summ_stats_h3_res4 = gen_zonal_stats(\n", " hexbin_generator,\n", @@ -1129,10 +1103,10 @@ } ], "source": [ - "#summary statistics in test geometry - adm level 3\n", + "# summary statistics in test geometry - adm level 3\n", "start_time = time.time()\n", "hexbin_generator = generate_h3_features(polygon, 6)\n", - "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n", + "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n", "# Outputs hexbin feautres with additional properties: \"population_sum\": \n", "summ_stats_h3_res6 = gen_zonal_stats(\n", " hexbin_generator,\n", @@ -1158,10 +1132,10 @@ } ], "source": [ - "#summary statistics in test geometry - adm level 3\n", + "# summary statistics in test geometry - adm level 3\n", "start_time = time.time()\n", "hexbin_generator = generate_h3_features(polygon, 5)\n", - "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n", + "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n", "# Outputs hexbin feautres with additional properties: \"population_sum\": \n", "summ_stats_h3_res5 = gen_zonal_stats(\n", " hexbin_generator,\n", @@ -1187,14 +1161,11 @@ } ], "source": [ - "#summary statistics in test geometry - adm level 3\n", + "# summary statistics in test geometry - adm level 3\n", "start_time = time.time()\n", "hexbin_generator = generate_h3_features(polygon, 8)\n", - "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n", - "zs_h3= zonal_stats(\n", - " hexbin_generator,\n", - " raster_path,\n", - " stats=\"median\")\n", + "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n", + "zs_h3 = zonal_stats(hexbin_generator, raster_path, stats=\"median\")\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -1212,17 +1183,17 @@ } ], "source": [ - "#summary statistics in test geometry - adm level 3\n", + "# summary statistics in test geometry - adm level 3\n", "start_time = time.time()\n", "hexbin_generator = generate_h3_features(polygon, 6)\n", - "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n", + "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n", "gpq_stats_h3_res6 = gen_point_query(\n", " hexbin_generator,\n", " raster_path,\n", - " interpolate = 'nearest',\n", - " property_name = 'bl_gpq_',\n", + " interpolate=\"nearest\",\n", + " property_name=\"bl_gpq_\",\n", " geojson_out=True,\n", - " )\n", + ")\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -1251,7 +1222,7 @@ "# check features of basins worldwide\n", "for generator in summ_stats_h3:\n", " for feature in generator:\n", - " print(feature['properties']['m_max'])\n", + " print(feature[\"properties\"][\"m_max\"])\n", " break" ] }, @@ -1345,7 +1316,6 @@ } ], "source": [ - "\n", "for feature in gpq_stats_h3_res6:\n", " print(feature)\n", " break" @@ -1365,15 +1335,15 @@ } ], "source": [ - "#generate a dataframe with the elements\n", + "# generate a dataframe with the elements\n", "start_time = time.time()\n", - "h3_gdf_wfbl = pd.DataFrame([],columns=['h3_id', 'wfbl_mmyr_cotton_median', 'geometry'])\n", + "h3_gdf_wfbl = pd.DataFrame([], columns=[\"h3_id\", \"wfbl_mmyr_cotton_median\", \"geometry\"])\n", "for feature in summ_stats_h3:\n", - " h3_gdf_wfbl.loc[len(h3_gdf_wfbl)]=[\n", - " feature['properties']['hexid'],\n", - " feature['properties']['wfbl_mmyr_cottonmedian'],\n", - " shape(feature['geometry'])\n", - " ]\n", + " h3_gdf_wfbl.loc[len(h3_gdf_wfbl)] = [\n", + " feature[\"properties\"][\"hexid\"],\n", + " feature[\"properties\"][\"wfbl_mmyr_cottonmedian\"],\n", + " shape(feature[\"geometry\"]),\n", + " ]\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -1391,15 +1361,15 @@ } ], "source": [ - "#generate a dataframe with the elements\n", + "# generate a dataframe with the elements\n", "start_time = time.time()\n", - "h3_gdf_wfbl_res4 = pd.DataFrame([],columns=['h3_id', 'wfbl_mmyr_cotton_median', 'geometry'])\n", + "h3_gdf_wfbl_res4 = pd.DataFrame([], columns=[\"h3_id\", \"wfbl_mmyr_cotton_median\", \"geometry\"])\n", "for feature in summ_stats_h3_res4:\n", - " h3_gdf_wfbl_res4.loc[len(h3_gdf_wfbl_res4)]=[\n", - " feature['properties']['hexid'],\n", - " feature['properties']['wfbl_mmyr_cottonmedian'],\n", - " shape(feature['geometry'])\n", - " ]\n", + " h3_gdf_wfbl_res4.loc[len(h3_gdf_wfbl_res4)] = [\n", + " feature[\"properties\"][\"hexid\"],\n", + " feature[\"properties\"][\"wfbl_mmyr_cottonmedian\"],\n", + " shape(feature[\"geometry\"]),\n", + " ]\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -1417,15 +1387,15 @@ } ], "source": [ - "#generate a dataframe with the elements\n", + "# generate a dataframe with the elements\n", "start_time = time.time()\n", - "h3_gdf_wfbl_res6 = pd.DataFrame([],columns=['h3_id', 'wfbl_mmyr_cotton_median', 'geometry'])\n", + "h3_gdf_wfbl_res6 = pd.DataFrame([], columns=[\"h3_id\", \"wfbl_mmyr_cotton_median\", \"geometry\"])\n", "for feature in summ_stats_h3_res6:\n", - " h3_gdf_wfbl_res6.loc[len(h3_gdf_wfbl_res6)]=[\n", - " feature['properties']['hexid'],\n", - " feature['properties']['wfbl_mmyr_cottonmedian'],\n", - " shape(feature['geometry'])\n", - " ]\n", + " h3_gdf_wfbl_res6.loc[len(h3_gdf_wfbl_res6)] = [\n", + " feature[\"properties\"][\"hexid\"],\n", + " feature[\"properties\"][\"wfbl_mmyr_cottonmedian\"],\n", + " shape(feature[\"geometry\"]),\n", + " ]\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -1443,15 +1413,15 @@ } ], "source": [ - "#generate a dataframe with the elements\n", + "# generate a dataframe with the elements\n", "start_time = time.time()\n", - "h3_gdf_wfbl_res5 = pd.DataFrame([],columns=['h3_id', 'wfbl_mmyr_cotton_median', 'geometry'])\n", + "h3_gdf_wfbl_res5 = pd.DataFrame([], columns=[\"h3_id\", \"wfbl_mmyr_cotton_median\", \"geometry\"])\n", "for feature in summ_stats_h3_res5:\n", - " h3_gdf_wfbl_res5.loc[len(h3_gdf_wfbl_res5)]=[\n", - " feature['properties']['hexid'],\n", - " feature['properties']['wfbl_mmyr_cottonmedian'],\n", - " shape(feature['geometry'])\n", - " ]\n", + " h3_gdf_wfbl_res5.loc[len(h3_gdf_wfbl_res5)] = [\n", + " feature[\"properties\"][\"hexid\"],\n", + " feature[\"properties\"][\"wfbl_mmyr_cottonmedian\"],\n", + " shape(feature[\"geometry\"]),\n", + " ]\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -1476,12 +1446,12 @@ "for generator in summ_stats_h3:\n", " for feature in generator:\n", " element = {\n", - " 'max':feature['properties']['m_max'],\n", - " 'hexId':feature['properties']['hexid'], \n", + " \"max\": feature[\"properties\"][\"m_max\"],\n", + " \"hexId\": feature[\"properties\"][\"hexid\"],\n", " }\n", " array_res1.append(element)\n", - " \n", - "print(\"--- %s seconds ---\" % (time.time() - start_time)) " + "\n", + "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, { @@ -1505,12 +1475,12 @@ "for generator in summ_stats_h3_res5:\n", " for feature in generator:\n", " element = {\n", - " 'max':feature['properties']['m_max'],\n", - " 'hexId':feature['properties']['hexid'], \n", + " \"max\": feature[\"properties\"][\"m_max\"],\n", + " \"hexId\": feature[\"properties\"][\"hexid\"],\n", " }\n", " array_res5.append(element)\n", - " \n", - "print(\"--- %s seconds ---\" % (time.time() - start_time))\n" + "\n", + "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, { @@ -1537,20 +1507,19 @@ } ], "source": [ - "\n", "## generate json for res1\n", "# check features of basins worldwide\n", "start_time = time.time()\n", "array_res7 = []\n", "for generator in summ_stats_h3_res7:\n", " for feature in generator:\n", - " if feature['properties']['m_max'] !=0:\n", + " if feature[\"properties\"][\"m_max\"] != 0:\n", " element = {\n", - " 'max':feature['properties']['m_max'],\n", - " 'hexId':feature['properties']['hexid'], \n", + " \"max\": feature[\"properties\"][\"m_max\"],\n", + " \"hexId\": feature[\"properties\"][\"hexid\"],\n", " }\n", " array_res7.append(element)\n", - " \n", + "\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -1569,7 +1538,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open('./array_res4_v3.json', 'w') as f:\n", + "with open(\"./array_res4_v3.json\", \"w\") as f:\n", " json.dump(array_res1, f)" ] }, @@ -2599,7 +2568,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open('./data_res5_v2.json', 'r') as f:\n", + "with open(\"./data_res5_v2.json\", \"r\") as f:\n", " res_5 = json.load(f)" ] }, @@ -3639,7 +3608,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open('./data_res4_4.json', 'r') as f:\n", + "with open(\"./data_res4_4.json\", \"r\") as f:\n", " res_4 = json.load(f)" ] }, @@ -3650,8 +3619,8 @@ "outputs": [], "source": [ "for row in res_4:\n", - " #print(row['hexId'])\n", - " row['hexId'] = list(h3.h3_to_children(row['hexId'], 5))" + " # print(row['hexId'])\n", + " row[\"hexId\"] = list(h3.h3_to_children(row[\"hexId\"], 5))" ] }, { @@ -3660,7 +3629,7 @@ "metadata": {}, "outputs": [], "source": [ - "res_4_5 = res_4.insert(0,res_5)" + "res_4_5 = res_4.insert(0, res_5)" ] }, { @@ -3669,8 +3638,8 @@ "metadata": {}, "outputs": [], "source": [ - "with open('./data_res4_5.json', 'w') as f:\n", - " json.dump(res_4,f)" + "with open(\"./data_res4_5.json\", \"w\") as f:\n", + " json.dump(res_4, f)" ] }, { @@ -3688,9 +3657,9 @@ "metadata": {}, "outputs": [], "source": [ - "#export \n", - "with open('./data_res4_5_joined.json', 'w') as f:\n", - " json.dump(res_join,f)" + "# export\n", + "with open(\"./data_res4_5_joined.json\", \"w\") as f:\n", + " json.dump(res_join, f)" ] }, { @@ -11719,8 +11688,7 @@ "metadata": {}, "outputs": [], "source": [ - "res_join_noNull = [el for el in res_join if el['max'] != 0]\n", - " " + "res_join_noNull = [el for el in res_join if el[\"max\"] != 0]" ] }, { @@ -11729,9 +11697,9 @@ "metadata": {}, "outputs": [], "source": [ - "#export \n", - "with open('./data_res4_5_joined_noNull.json', 'w') as f:\n", - " json.dump(res_join_noNull,f)\n" + "# export\n", + "with open(\"./data_res4_5_joined_noNull.json\", \"w\") as f:\n", + " json.dump(res_join_noNull, f)" ] }, { @@ -11740,7 +11708,7 @@ "metadata": {}, "outputs": [], "source": [ - "h3_gdf_wfbl.to_csv('../../datasets/processed/h3_summary_stats_test_india_res8_att.csv')" + "h3_gdf_wfbl.to_csv(\"../../datasets/processed/h3_summary_stats_test_india_res8_att.csv\")" ] }, { @@ -11749,7 +11717,7 @@ "metadata": {}, "outputs": [], "source": [ - "h3_gdf_wfbl_res4.to_csv('../../datasets/processed/h3_summary_stats_test_india_res4.csv')" + "h3_gdf_wfbl_res4.to_csv(\"../../datasets/processed/h3_summary_stats_test_india_res4.csv\")" ] }, { @@ -11758,7 +11726,7 @@ "metadata": {}, "outputs": [], "source": [ - "h3_gdf_wfbl_res6.to_csv('../../datasets/processed/h3_summary_stats_test_india_res6.csv')" + "h3_gdf_wfbl_res6.to_csv(\"../../datasets/processed/h3_summary_stats_test_india_res6.csv\")" ] }, { @@ -11767,7 +11735,7 @@ "metadata": {}, "outputs": [], "source": [ - "h3_gdf_wfbl_res5.to_csv('../../datasets/processed/h3_summary_stats_test_india_res5.csv')" + "h3_gdf_wfbl_res5.to_csv(\"../../datasets/processed/h3_summary_stats_test_india_res5.csv\")" ] }, { @@ -11810,7 +11778,7 @@ "# percentage of cells with value zero at varius index resolutions\n", "\n", "msg_ = \"Percentage of cells with value zero at resolution {}: {} %\"\n", - "perc_hexes_zeros = 100 * (len(h3_gdf_wfbl)- len(h3_gdf_wfbl_noNan)) / len(h3_gdf_wfbl)\n", + "perc_hexes_zeros = 100 * (len(h3_gdf_wfbl) - len(h3_gdf_wfbl_noNan)) / len(h3_gdf_wfbl)\n", "print(msg_.format(8, round(perc_hexes_zeros, 2)))" ] }, @@ -11820,8 +11788,8 @@ "metadata": {}, "outputs": [], "source": [ - "#we can interpolate for visualization options\n", - "df_test = h3_gdf_wfbl.interpolate(method='nearest')" + "# we can interpolate for visualization options\n", + "df_test = h3_gdf_wfbl.interpolate(method=\"nearest\")" ] }, { @@ -11972,7 +11940,7 @@ "metadata": {}, "outputs": [], "source": [ - "df_test.to_csv('../../datasets/processed/h3_summary_stats_test_india_interpolated.csv')" + "df_test.to_csv(\"../../datasets/processed/h3_summary_stats_test_india_interpolated.csv\")" ] }, { @@ -12088,7 +12056,7 @@ "metadata": {}, "outputs": [], "source": [ - "h3_gdf_wfbl.insert(3, 'nearest_geometry', None)" + "h3_gdf_wfbl.insert(3, \"nearest_geometry\", None)" ] }, { @@ -12097,8 +12065,8 @@ "metadata": {}, "outputs": [], "source": [ - "points = [row.geometry. centroid for index, row in h3_gdf_wfbl.iterrows() ]\n", - "h3_gdf_wfbl['points']=points" + "points = [row.geometry.centroid for index, row in h3_gdf_wfbl.iterrows()]\n", + "h3_gdf_wfbl[\"points\"] = points" ] }, { @@ -12318,7 +12286,7 @@ } ], "source": [ - "#drop nan values\n", + "# drop nan values\n", "h3_gdf_wfbl_noNan = h3_gdf_wfbl.dropna()\n", "h3_gdf_wfbl_noNan.head()" ] @@ -12329,13 +12297,12 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "for index, row in h3_gdf_wfbl.iterrows():\n", " point = row.points\n", - " #multipoint = h3_gdf_wfbl.drop(index, axis=0).points.unary_union\n", + " # multipoint = h3_gdf_wfbl.drop(index, axis=0).points.unary_union\n", " multipoint = h3_gdf_wfbl_noNan.points.unary_union\n", " queried_geom, nearest_geom = nearest_points(point, multipoint)\n", - " h3_gdf_wfbl.loc[index, 'nearest_geometry'] = nearest_geom" + " h3_gdf_wfbl.loc[index, \"nearest_geometry\"] = nearest_geom" ] }, { @@ -12455,9 +12422,10 @@ "outputs": [], "source": [ "for index, row in h3_gdf_wfbl.iterrows():\n", - " nearest_value = h3_gdf_wfbl_noNan[h3_gdf_wfbl_noNan['points']== h3_gdf_wfbl.iloc[index]['nearest_geometry']].iloc[0]['wfbl_mmyr_cotton_median']\n", - " h3_gdf_wfbl.loc[index, 'wfbl_mmyr_cotton_median'] = nearest_value\n", - " " + " nearest_value = h3_gdf_wfbl_noNan[\n", + " h3_gdf_wfbl_noNan[\"points\"] == h3_gdf_wfbl.iloc[index][\"nearest_geometry\"]\n", + " ].iloc[0][\"wfbl_mmyr_cotton_median\"]\n", + " h3_gdf_wfbl.loc[index, \"wfbl_mmyr_cotton_median\"] = nearest_value" ] }, { @@ -12645,9 +12613,9 @@ "metadata": {}, "outputs": [], "source": [ - "#set the hexagon as geometry and export\n", - "h3_gdf_wfbl = h3_gdf_wfbl.set_geometry('geometry')[['h3_id', 'wfbl_mmyr_cotton_median', 'geometry']]\n", - "h3_gdf_wfbl.to_csv('../../datasets/processed/h3_summary_stats_test_india_interpolated_nearest.csv')" + "# set the hexagon as geometry and export\n", + "h3_gdf_wfbl = h3_gdf_wfbl.set_geometry(\"geometry\")[[\"h3_id\", \"wfbl_mmyr_cotton_median\", \"geometry\"]]\n", + "h3_gdf_wfbl.to_csv(\"../../datasets/processed/h3_summary_stats_test_india_interpolated_nearest.csv\")" ] }, { diff --git a/data/notebooks/Lab/0_5_Area_distribution.ipynb b/data/notebooks/Lab/0_5_Area_distribution.ipynb index 51faea0dc..fd835c6c9 100644 --- a/data/notebooks/Lab/0_5_Area_distribution.ipynb +++ b/data/notebooks/Lab/0_5_Area_distribution.ipynb @@ -56,13 +56,13 @@ "outputs": [], "source": [ "## import libraries\n", - "import geopandas as gpd\n", "import time\n", "\n", - "from rasterstats import zonal_stats\n", + "import geopandas as gpd\n", + "import matplotlib.pyplot as plt\n", "import rasterio as rio\n", "import rasterio.plot\n", - "import matplotlib.pyplot as plt" + "from rasterstats import zonal_stats" ] }, { @@ -208,7 +208,7 @@ "source": [ "## import user data\n", "## projection of user data is epsg:4326\n", - "input_data = gpd.read_file('../../datasets/processed/user_data/located_lg_data_polygon_v2.shp') \n", + "input_data = gpd.read_file(\"../../datasets/processed/user_data/located_lg_data_polygon_v2.shp\")\n", "input_data.head()" ] }, @@ -229,7 +229,7 @@ "source": [ "## check the commodities materials - we will need to generate a distribution map for each of the commodities.\n", "\n", - "print(f'The uniques commodities from the user data are:', set(input_data['Material']))" + "print(\"The uniques commodities from the user data are:\", set(input_data[\"Material\"]))" ] }, { @@ -311,7 +311,11 @@ } ], "source": [ - "test_location = input_data.loc[(input_data['Material']=='Cotton') & (input_data['Country']=='India') & ((input_data['Volume']==745) )]\n", + "test_location = input_data.loc[\n", + " (input_data[\"Material\"] == \"Cotton\")\n", + " & (input_data[\"Country\"] == \"India\")\n", + " & ((input_data[\"Volume\"] == 745))\n", + "]\n", "test_location" ] }, @@ -331,7 +335,7 @@ ], "source": [ "test_location = test_location.set_crs(\"EPSG:4326\")\n", - "print(f'projection of user data is: {test_location.crs}')" + "print(f\"projection of user data is: {test_location.crs}\")" ] }, { @@ -341,7 +345,7 @@ "metadata": {}, "outputs": [], "source": [ - "#reproject to epsg3857\n", + "# reproject to epsg3857\n", "test_location = test_location.to_crs(\"EPSG:3857\")" ] }, @@ -352,7 +356,9 @@ "metadata": {}, "outputs": [], "source": [ - "test_location.to_file('../../datasets/raw/probability_map/test_location_epsg3857.shp', driver='ESRI Shapefile')" + "test_location.to_file(\n", + " \"../../datasets/raw/probability_map/test_location_epsg3857.shp\", driver=\"ESRI Shapefile\"\n", + ")" ] }, { @@ -394,8 +400,8 @@ "metadata": {}, "outputs": [], "source": [ - "yield_cotton = '../../datasets/raw/crop_data/cotton_YieldPerHectare.tif'\n", - "harvest_portion_cotton = '../../datasets/raw/crop_data/cotton_HarvestedAreaFraction.tif'\n" + "yield_cotton = \"../../datasets/raw/crop_data/cotton_YieldPerHectare.tif\"\n", + "harvest_portion_cotton = \"../../datasets/raw/crop_data/cotton_HarvestedAreaFraction.tif\"" ] }, { @@ -568,7 +574,7 @@ } ], "source": [ - "# reproject yield \n", + "# reproject yield\n", "!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:3857 -tr 12051.131160772875 12051.131160772875 -r near -of GTiff ../../datasets/raw/crop_data/cotton_YieldPerHectare.tif ../../datasets/raw/crop_data/cotton_YieldPerHectare_epsg3857.tif" ] }, @@ -657,8 +663,8 @@ } ], "source": [ - "#check infor of the reprojected raster\n", - "!gdalinfo -stats -hist '../../datasets/raw/crop_data/cotton_HarvestedAreaFraction_epsg3857.tif'\n" + "# check infor of the reprojected raster\n", + "!gdalinfo -stats -hist '../../datasets/raw/crop_data/cotton_HarvestedAreaFraction_epsg3857.tif'" ] }, { @@ -808,8 +814,7 @@ "source": [ "## calculate pixel area based on pixel size\n", "pixel_area = 12051.131160772874864 * 12051.131160772874864\n", - "print(f'The pixel area is {pixel_area} m2')\n", - "\n" + "print(f\"The pixel area is {pixel_area} m2\")" ] }, { @@ -906,7 +911,7 @@ } ], "source": [ - "#generate raster with pixel area raster\n", + "# generate raster with pixel area raster\n", "# reclasifies the raster into 0 and pixel area being the pixel area just on thise locations with harvest area fraction\n", "!gdal_calc.py -A '../../datasets/raw/crop_data/cotton_HarvestedAreaFraction_epsg3857.tif' --outfile='../../datasets/raw/probability_map/pixel_area_cotton_raster_epsg3857.tif' --calc=\"(A > 0) * 145229762.254151\"" ] @@ -1035,13 +1040,12 @@ } ], "source": [ - "#zonal stats in india to get the sum of all fraction harvest area\n", - "total_harves_area_cotton = '../../datasets/raw/probability_map/area_total_cotton_raster_epsg3857.tif'\n", + "# zonal stats in india to get the sum of all fraction harvest area\n", + "total_harves_area_cotton = (\n", + " \"../../datasets/raw/probability_map/area_total_cotton_raster_epsg3857.tif\"\n", + ")\n", "start_time = time.time()\n", - "zs_india_test = zonal_stats(\n", - " test_location,\n", - " total_harves_area_cotton,\n", - " stats=\"sum\")\n", + "zs_india_test = zonal_stats(test_location, total_harves_area_cotton, stats=\"sum\")\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -1060,7 +1064,7 @@ } ], "source": [ - "print(f' The total cotton harvest area in india is :', {zs_india_test[0]['sum']}, 'm2')" + "print(\" The total cotton harvest area in india is :\", {zs_india_test[0][\"sum\"]}, \"m2\")" ] }, { @@ -1137,7 +1141,7 @@ ], "source": [ "## ad field to gdf\n", - "test_location['Total_af'] = zs_india_test[0]['sum']\n", + "test_location[\"Total_af\"] = zs_india_test[0][\"sum\"]\n", "test_location" ] }, @@ -1148,7 +1152,9 @@ "metadata": {}, "outputs": [], "source": [ - "test_location.to_file('../../datasets/raw/probability_map/test_location_epsg3857.shp', driver='ESRI Shapefile')" + "test_location.to_file(\n", + " \"../../datasets/raw/probability_map/test_location_epsg3857.shp\", driver=\"ESRI Shapefile\"\n", + ")" ] }, { @@ -1167,7 +1173,7 @@ ], "source": [ "## generate a raster with same extent as the other ones with this total area fraction value\n", - "!gdal_rasterize -l test_location_epsg3857 -a Total_af -tr 12051.131160772875 12051.131160772875 -a_nodata 0.0 -te -20037508.3428 -242486969.8524 20032502.7668 191642979.0833 -ot Float32 -of GTiff '../../datasets/raw/probability_map/test_location_epsg3857.shp' '../../datasets/raw/probability_map/test_location_raster_total_af.tif'\n" + "!gdal_rasterize -l test_location_epsg3857 -a Total_af -tr 12051.131160772875 12051.131160772875 -a_nodata 0.0 -te -20037508.3428 -242486969.8524 20032502.7668 191642979.0833 -ot Float32 -of GTiff '../../datasets/raw/probability_map/test_location_epsg3857.shp' '../../datasets/raw/probability_map/test_location_raster_total_af.tif'" ] }, { @@ -1406,13 +1412,22 @@ } ], "source": [ - "with rio.open('../../datasets/processed/probability_map/purchase_area_distribution_cotton_epsg3857.tif') as src:\n", + "with rio.open(\n", + " \"../../datasets/processed/probability_map/purchase_area_distribution_cotton_epsg3857.tif\"\n", + ") as src:\n", " image_array = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((695174.093781,4.255931e+06))\n", - " ax.set_xlim((7.582124e+06,1.084202e+07))\n", - " rio.plot.show(image_array, vmin=2.1023777208029e-06, vmax=1.0740570812899e-05, cmap='Oranges', ax=ax, transform=src.transform)\n", - " ax.set_title('Geospatial responsibility - test location')" + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((695174.093781, 4.255931e06))\n", + " ax.set_xlim((7.582124e06, 1.084202e07))\n", + " rio.plot.show(\n", + " image_array,\n", + " vmin=2.1023777208029e-06,\n", + " vmax=1.0740570812899e-05,\n", + " cmap=\"Oranges\",\n", + " ax=ax,\n", + " transform=src.transform,\n", + " )\n", + " ax.set_title(\"Geospatial responsibility - test location\")" ] }, { diff --git a/data/notebooks/Lab/0_6_test_h3ronpy_fg.ipynb b/data/notebooks/Lab/0_6_test_h3ronpy_fg.ipynb index 4471d8a17..8ff425f92 100644 --- a/data/notebooks/Lab/0_6_test_h3ronpy_fg.ipynb +++ b/data/notebooks/Lab/0_6_test_h3ronpy_fg.ipynb @@ -13,20 +13,16 @@ "cell_type": "code", "execution_count": 30, "source": [ + "import os\n", + "\n", + "import geopandas as gpd\n", "import h3\n", - "from h3ronpy import raster\n", + "import pandas as pd\n", "import rasterio as rio\n", "import rasterio.plot\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "from rasterstats import gen_zonal_stats, gen_point_query\n", - "from rasterstats import zonal_stats\n", - "import pandas as pd\n", - "import geopandas as gpd\n", - "import h3\n", - "import json\n", - "import os\n", - "from shapely.geometry import shape, mapping, box, Point, LinearRing, Polygon\n" + "from h3ronpy import raster\n", + "from rasterstats import gen_point_query\n", + "from shapely.geometry import Point, Polygon, box, mapping" ], "outputs": [], "metadata": {} @@ -35,9 +31,11 @@ "cell_type": "code", "execution_count": 4, "source": [ - "test_raster = '../../data/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m.tif'\n", + "test_raster = (\n", + " \"../../data/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m.tif\"\n", + ")\n", "\n", - "test_area = (-10,40,0,50)" + "test_area = (-10, 40, 0, 50)" ], "outputs": [], "metadata": {} @@ -51,10 +49,16 @@ " transform = rio.windows.transform(window, src.transform)\n", " print(src.profile)\n", " rio.plot.show(src.read(window=window, masked=True))\n", - " gdf = raster.raster_to_geodataframe(src.read(1, window=window), transform, h3_resolution=4, nodata_value=src.profile['nodata'], compacted=False)\n", + " gdf = raster.raster_to_geodataframe(\n", + " src.read(1, window=window),\n", + " transform,\n", + " h3_resolution=4,\n", + " nodata_value=src.profile[\"nodata\"],\n", + " compacted=False,\n", + " )\n", "\n", - "gdf.plot('value')\n", - "#gdf['h3index'] = gdf['h3index'].apply(hex)\n", + "gdf.plot(\"value\")\n", + "# gdf['h3index'] = gdf['h3index'].apply(hex)\n", "gdf.head()" ], "outputs": [ @@ -184,9 +188,11 @@ "idx_int = [int(h, 16) for h in idx]\n", "geoms = h3.h3_set_to_multi_polygon(idx, geo_json=True)\n", "\n", - "df = pd.DataFrame({'h3index':idx_int, 'value':gen_point_query(pts, test_raster, interpolate='nearest')})\n", + "df = pd.DataFrame(\n", + " {\"h3index\": idx_int, \"value\": gen_point_query(pts, test_raster, interpolate=\"nearest\")}\n", + ")\n", "df = df.dropna()\n", - "df.plot('value')\n", + "df.plot(\"value\")\n", "df.head()" ], "outputs": [ @@ -277,7 +283,7 @@ "cell_type": "code", "execution_count": 7, "source": [ - "j = gdf.set_index('h3index').join(df.set_index('h3index'), rsuffix='t')\n", + "j = gdf.set_index(\"h3index\").join(df.set_index(\"h3index\"), rsuffix=\"t\")\n", "j" ], "outputs": [ @@ -443,7 +449,9 @@ "execution_count": 135, "source": [ "%%timeit\n", - "pd.DataFrame({'h3index':idx_int, 'value':gen_point_query(pts, test_raster, interpolate='nearest')})" + "pd.DataFrame(\n", + " {\"h3index\": idx_int, \"value\": gen_point_query(pts, test_raster, interpolate=\"nearest\")}\n", + ")" ], "outputs": [ { @@ -462,7 +470,13 @@ "source": [ "%%timeit\n", "with rio.open(test_raster) as src:\n", - " raster.to_dataframe(src.read(1, window=window), transform, h3_resolution=4, nodata_value=src.profile['nodata'], compacted=False)" + " raster.to_dataframe(\n", + " src.read(1, window=window),\n", + " transform,\n", + " h3_resolution=4,\n", + " nodata_value=src.profile[\"nodata\"],\n", + " compacted=False,\n", + " )" ], "outputs": [ { @@ -484,10 +498,11 @@ " for ji, window in src.block_windows():\n", " transform = rio.windows.transform(window, src.transform)\n", " arr = src.read(1, window=window)\n", - " \n", - " df = h3ronpy.raster.raster_to_geodataframe(arr, transform, 4, nodata_value=src.profile['nodata'], compacted=False)\n", - " dfs.append(df)\n", - "\n" + "\n", + " df = h3ronpy.raster.raster_to_geodataframe(\n", + " arr, transform, 4, nodata_value=src.profile[\"nodata\"], compacted=False\n", + " )\n", + " dfs.append(df)" ], "outputs": [], "metadata": {} @@ -496,7 +511,7 @@ "cell_type": "code", "execution_count": 147, "source": [ - "l = [i for df in dfs for i in df['h3index']]\n", + "l = [i for df in dfs for i in df[\"h3index\"]]\n", "print(len(l))\n", "print(len(set(l)))" ], @@ -516,7 +531,7 @@ "cell_type": "code", "execution_count": 150, "source": [ - "pd.concat(dfs).plot('value')" + "pd.concat(dfs).plot(\"value\")" ], "outputs": [ { @@ -549,12 +564,15 @@ "execution_count": 23, "source": [ "from math import ceil\n", + "\n", "BLOCKSIZE = 512\n", + "\n", + "\n", "def gen_raster_h3(raster_list, h3_res):\n", " \"\"\"Convert a list of identically formatted rasters to H3\n", - " \n", + "\n", " A function for efficiently turning a set of rasters into an H3 table.\n", - " \n", + "\n", " Takes a list of 1-band rasters with identical projection/transform.\n", " Reads each raster in blocks, and converts to h3 (nearest to centroid).\n", " Yields a dataframe with an h3index and one column for each raster's value.\n", @@ -562,35 +580,36 @@ " Args:\n", " raster_list: list of paths to rasters\n", " h3_res: h3 resolution to use for resampling\n", - " \n", + "\n", " Yields:\n", - " A Pandas dataframe for each raster block (usu. 512x512) with an \n", + " A Pandas dataframe for each raster block (usu. 512x512) with an\n", " h3index and one column for each raster's value.\n", " \"\"\"\n", " readers = [rio.open(r) for r in raster_list]\n", " names = [os.path.splitext(os.path.basename(r))[0].lower() for r in raster_list]\n", - " \n", + "\n", " base = readers[0]\n", - " for j in range(ceil(base.height/BLOCKSIZE)):\n", - " for i in range(ceil(base.width/BLOCKSIZE)):\n", - " window = rio.windows.Window(i*BLOCKSIZE, j*BLOCKSIZE, BLOCKSIZE, BLOCKSIZE)\n", + " for j in range(ceil(base.height / BLOCKSIZE)):\n", + " for i in range(ceil(base.width / BLOCKSIZE)):\n", + " window = rio.windows.Window(i * BLOCKSIZE, j * BLOCKSIZE, BLOCKSIZE, BLOCKSIZE)\n", " w_transform = rio.windows.transform(window, base.transform)\n", " dfs = []\n", " for src in readers:\n", " if src.transform != base.transform:\n", " raise ValueError(\"Transforms do not match\")\n", " arr = src.read(1, window=window)\n", - " _df = raster.raster_to_dataframe(arr, w_transform, h3_res, nodata_value=src.profile['nodata'], compacted=False)\n", - " dfs.append(_df.set_index('h3index')['value'])\n", + " _df = raster.raster_to_dataframe(\n", + " arr, w_transform, h3_res, nodata_value=src.profile[\"nodata\"], compacted=False\n", + " )\n", + " dfs.append(_df.set_index(\"h3index\")[\"value\"])\n", " df = pd.concat(dfs, axis=1)\n", - " print(f'Reading block {j}, {i}: h3index count {len(df)}')\n", + " print(f\"Reading block {j}, {i}: h3index count {len(df)}\")\n", " if len(df):\n", " df.columns = names\n", " # cast h3index from int64 to hex string\n", " yield df\n", " for src in readers:\n", - " src.close()\n", - "\n" + " src.close()" ], "outputs": [], "metadata": {} @@ -600,14 +619,16 @@ "execution_count": 32, "source": [ "test_list = [\n", - " '../../data/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m.tif',\n", - " '../../data/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Cropland2000_5m.tif'\n", + " \"../../data/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m.tif\",\n", + " \"../../data/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Cropland2000_5m.tif\",\n", "]\n", - "test_dir = '../../data/seed/mapspam/spam2017v2r1_ssa_prod'\n", + "test_dir = \"../../data/seed/mapspam/spam2017v2r1_ssa_prod\"\n", "test_list2 = [os.path.join(test_dir, f) for f in os.listdir(test_dir)]\n", "\n", "h3grid = pd.concat(list(gen_raster_h3(test_list2, 4)))\n", - "h3grid = gpd.GeoDataFrame(h3grid, geometry=[Polygon(h3.h3_to_geo_boundary(h, geo_json=True)) for h in h3grid.index])\n", + "h3grid = gpd.GeoDataFrame(\n", + " h3grid, geometry=[Polygon(h3.h3_to_geo_boundary(h, geo_json=True)) for h in h3grid.index]\n", + ")\n", "h3grid.plot()" ], "outputs": [ diff --git a/data/notebooks/Lab/0_7_Calculate_bins_for_contextual_layers.ipynb b/data/notebooks/Lab/0_7_Calculate_bins_for_contextual_layers.ipynb index 312c41594..18bce9c26 100644 --- a/data/notebooks/Lab/0_7_Calculate_bins_for_contextual_layers.ipynb +++ b/data/notebooks/Lab/0_7_Calculate_bins_for_contextual_layers.ipynb @@ -7,11 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "import json\n", - "import os\n", - "\n", "import matplotlib.pyplot as plt\n", - "import numpy as np\n", "import pandas as pd\n", "from mapclassify import NaturalBreaks\n", "from psycopg import connect\n", diff --git a/data/notebooks/Lab/0_data_geocoding.ipynb b/data/notebooks/Lab/0_data_geocoding.ipynb index 115e1f67b..41dad0099 100644 --- a/data/notebooks/Lab/0_data_geocoding.ipynb +++ b/data/notebooks/Lab/0_data_geocoding.ipynb @@ -102,16 +102,11 @@ } ], "source": [ - "import pandas as pd\n", - "import geopandas as gpd\n", - "import numpy as np\n", "import os\n", - "import shapely.wkt\n", - "import folium\n", "\n", - "from collections import OrderedDict\n", - "import requests\n", - "import time\n", + "import folium\n", + "import geopandas as gpd\n", + "import pandas as pd\n", "from shapely.geometry import Point" ] }, @@ -130,25 +125,27 @@ "metadata": {}, "outputs": [], "source": [ - "def visualise(data, z=3, loc=[0,0], color=\"#f69\", tooltip_property=None):\n", + "def visualise(data, z=3, loc=[0, 0], color=\"#f69\", tooltip_property=None):\n", " \"\"\"Maps a list of geojson features\"\"\"\n", " # Adapted from docs: https://geopandas.readthedocs.io/en/latest/gallery/polygon_plotting_with_folium.html\n", "\n", - " m = folium.Map(location=loc, zoom_start=z, tiles='CartoDB positron')\n", - " \n", + " m = folium.Map(location=loc, zoom_start=z, tiles=\"CartoDB positron\")\n", + "\n", " for d in data:\n", - " geo_j = folium.GeoJson(data=d['geometry'],\n", - " style_function=lambda x: {\n", - " 'fillColor': color,\n", - " 'color': color,\n", - " 'weight': 1.5,\n", - " })\n", + " geo_j = folium.GeoJson(\n", + " data=d[\"geometry\"],\n", + " style_function=lambda x: {\n", + " \"fillColor\": color,\n", + " \"color\": color,\n", + " \"weight\": 1.5,\n", + " },\n", + " )\n", "\n", " ## No popup yet\n", " if tooltip_property:\n", - " prop = d['properties'].get(tooltip_property, '')\n", + " prop = d[\"properties\"].get(tooltip_property, \"\")\n", " folium.Popup(str(prop)).add_to(geo_j)\n", - " \n", + "\n", " geo_j.add_to(m)\n", "\n", " return m" @@ -184,7 +181,7 @@ } ], "source": [ - "point = GeolocateAddress(query='india')\n", + "point = GeolocateAddress(query=\"india\")\n", "point.point" ] }, @@ -247,8 +244,8 @@ } ], "source": [ - "input_dir = '../../data/raw/'\n", - "files = [input_dir+f for f in os.listdir(input_dir) if '.csv' in f]\n", + "input_dir = \"../../data/raw/\"\n", + "files = [input_dir + f for f in os.listdir(input_dir) if \".csv\" in f]\n", "files" ] }, @@ -403,7 +400,7 @@ ], "source": [ "# check unique field in location type\n", - "set(input_data['Location type'])" + "set(input_data[\"Location type\"])" ] }, { @@ -412,10 +409,14 @@ "metadata": {}, "outputs": [], "source": [ - "unknown_locations = input_data[input_data['Location type']=='Unknown']\n", - "os_facility = input_data[input_data['Location type']=='Origin supplier facility (warehouse, silo, mill, etc.)']\n", - "oc = input_data[input_data['Location type']=='Origin country']\n", - "os_production = input_data[input_data['Location type']=='Point of production (farm, ranch, plantation, etc.)']" + "unknown_locations = input_data[input_data[\"Location type\"] == \"Unknown\"]\n", + "os_facility = input_data[\n", + " input_data[\"Location type\"] == \"Origin supplier facility (warehouse, silo, mill, etc.)\"\n", + "]\n", + "oc = input_data[input_data[\"Location type\"] == \"Origin country\"]\n", + "os_production = input_data[\n", + " input_data[\"Location type\"] == \"Point of production (farm, ranch, plantation, etc.)\"\n", + "]" ] }, { @@ -438,12 +439,14 @@ } ], "source": [ - "print(f'The total length of the input data file is: {len(input_data)} rows/locations.\\n')\n", - "print(f\"\"\"For those locations, there are {len(unknown_locations)} unknown locations,\n", + "print(f\"The total length of the input data file is: {len(input_data)} rows/locations.\\n\")\n", + "print(\n", + " f\"\"\"For those locations, there are {len(unknown_locations)} unknown locations,\n", "{len(os_facility)} Origin supplier facility (warehouse, silo, mill, etc locations,\n", "{len(oc)} 'Origin country' locations and \n", "{len(os_production)} 'Point of production (farm, ranch, plantation, etc.)' locations\n", - "\"\"\")\n" + "\"\"\"\n", + ")" ] }, { @@ -591,14 +594,14 @@ " for i in range(0, len(gdf)):\n", " row = gdf.iloc[i]\n", " rowIndex = gdf.index[i]\n", - " lat = row['Latitude']\n", - " lng = row['Longitude']\n", + " lat = row[\"Latitude\"]\n", + " lng = row[\"Longitude\"]\n", " point = (lng, lat)\n", " geom = Point(point)\n", " geoms.append(geom)\n", - " #gdf.loc[rowIndex, 'Geometry_wkt'] = geom.to_wkt()\n", - " gdf.loc[rowIndex, 'Accuracy'] = 'High'\n", - " gdf['Geometry'] = geoms\n", + " # gdf.loc[rowIndex, 'Geometry_wkt'] = geom.to_wkt()\n", + " gdf.loc[rowIndex, \"Accuracy\"] = \"High\"\n", + " gdf[\"Geometry\"] = geoms\n", " return gdf" ] }, @@ -871,7 +874,7 @@ "metadata": {}, "outputs": [], "source": [ - "geolocated_gdf = geolocated_gdf.set_geometry('Geometry')" + "geolocated_gdf = geolocated_gdf.set_geometry(\"Geometry\")" ] }, { @@ -1072,6 +1075,7 @@ ], "source": [ "import json\n", + "\n", "gjson = json.loads(geolocated_gdf.to_json())\n", "gjson" ] @@ -1096,7 +1100,7 @@ } ], "source": [ - "visualise(gjson['features'], tooltip_property='Location type')" + "visualise(gjson[\"features\"], tooltip_property=\"Location type\")" ] }, { @@ -1481,7 +1485,9 @@ } ], "source": [ - "processing_facility['Full Address'] = processing_facility['Address'] + ', ' + processing_facility['Country']\n", + "processing_facility[\"Full Address\"] = (\n", + " processing_facility[\"Address\"] + \", \" + processing_facility[\"Country\"]\n", + ")\n", "processing_facility.head()" ] }, @@ -1505,7 +1511,7 @@ "source": [ "### Why could try and do an .apply() operation on the whole df\n", "\n", - "address = processing_facility.iloc[0]['Full Address']\n", + "address = processing_facility.iloc[0][\"Full Address\"]\n", "geo_loc_test = GeolocateAddress(query=address)" ] }, @@ -1571,7 +1577,7 @@ } ], "source": [ - "gdf = gpd.GeoDataFrame.from_features(geo_loc_test.polygon_json, crs='epsg:4326')\n", + "gdf = gpd.GeoDataFrame.from_features(geo_loc_test.polygon_json, crs=\"epsg:4326\")\n", "gdf" ] }, @@ -1595,8 +1601,8 @@ } ], "source": [ - "m = folium.Map(location=[0,0],tiles=\"cartodbpositron\", zoom_start=5)\n", - "folium.GeoJson(data=gdf[\"geometry\"]).add_to(m) \n", + "m = folium.Map(location=[0, 0], tiles=\"cartodbpositron\", zoom_start=5)\n", + "folium.GeoJson(data=gdf[\"geometry\"]).add_to(m)\n", "m" ] }, @@ -1747,7 +1753,7 @@ ], "source": [ "row = processing_facility.iloc[0]\n", - "adress_country = row['Country']\n", + "adress_country = row[\"Country\"]\n", "adress_country" ] }, @@ -1834,7 +1840,7 @@ } ], "source": [ - "gdf = gpd.GeoDataFrame.from_features(geo_loc_test.polygon_json, crs='epsg:4326')\n", + "gdf = gpd.GeoDataFrame.from_features(geo_loc_test.polygon_json, crs=\"epsg:4326\")\n", "gdf" ] }, @@ -1858,8 +1864,8 @@ } ], "source": [ - "m = folium.Map(location=[0,0],tiles=\"cartodbpositron\", zoom_start=3)\n", - "folium.GeoJson(data=gdf[\"geometry\"]).add_to(m) \n", + "m = folium.Map(location=[0, 0], tiles=\"cartodbpositron\", zoom_start=3)\n", + "folium.GeoJson(data=gdf[\"geometry\"]).add_to(m)\n", "m" ] }, @@ -2242,49 +2248,48 @@ "accuracy_list = []\n", "for i in range(0, len(input_data)):\n", " row = input_data.iloc[i]\n", - " if row['Location type'] == 'Unknown' or row['Location type'] =='Origin country':\n", - " country_name = row['Country']\n", + " if row[\"Location type\"] == \"Unknown\" or row[\"Location type\"] == \"Origin country\":\n", + " country_name = row[\"Country\"]\n", " try:\n", " geolocation = GeolocateAddress(query=country_name)\n", - " gdf = gpd.GeoDataFrame.from_features(geolocation.polygon_json, crs='epsg:4326')\n", - " geom = gdf['geometry'].iloc[0]\n", - " accuracy = 'Low'\n", + " gdf = gpd.GeoDataFrame.from_features(geolocation.polygon_json, crs=\"epsg:4326\")\n", + " geom = gdf[\"geometry\"].iloc[0]\n", + " accuracy = \"Low\"\n", " except:\n", - " print(f'Geolocation failed for {country_name}')\n", - " geom = 'None'\n", - " accuracy = 'None'\n", - " if row['Location type'] == 'Origin supplier facility (warehouse, silo, mill, etc.)':\n", + " print(f\"Geolocation failed for {country_name}\")\n", + " geom = \"None\"\n", + " accuracy = \"None\"\n", + " if row[\"Location type\"] == \"Origin supplier facility (warehouse, silo, mill, etc.)\":\n", " try:\n", - " adress_count = row['Address'] + ', ' + row['Country']\n", + " adress_count = row[\"Address\"] + \", \" + row[\"Country\"]\n", " geolocation = GeolocateAddress(query=adress_count)\n", - " gdf = gpd.GeoDataFrame.from_features(geolocation.polygon_json, crs='epsg:4326')\n", - " geom = gdf['geometry'].iloc[0]\n", - " accuracy = 'Medium'\n", + " gdf = gpd.GeoDataFrame.from_features(geolocation.polygon_json, crs=\"epsg:4326\")\n", + " geom = gdf[\"geometry\"].iloc[0]\n", + " accuracy = \"Medium\"\n", " except:\n", - " print(f'Geolocation failed for row {i}')\n", + " print(f\"Geolocation failed for row {i}\")\n", " try:\n", - " print('trying for country...')\n", - " country_name = row['Country']\n", + " print(\"trying for country...\")\n", + " country_name = row[\"Country\"]\n", " geolocation = GeolocateAddress(query=country_name)\n", - " gdf = gpd.GeoDataFrame.from_features(geolocation.polygon_json, crs='epsg:4326')\n", - " geom = gdf['geometry'].iloc[0]\n", - " accuracy = 'Low'\n", + " gdf = gpd.GeoDataFrame.from_features(geolocation.polygon_json, crs=\"epsg:4326\")\n", + " geom = gdf[\"geometry\"].iloc[0]\n", + " accuracy = \"Low\"\n", " except:\n", - " print(f'Geolocation failed for {country_name}')\n", - " geom = 'None'\n", - " accuracy= 'None'\n", - " \n", - " if row['Location type'] == 'Point of production (farm, ranch, plantation, etc.)':\n", - " lat = row['Latitude']\n", - " lng = row['Longitude']\n", - " #point = (lat, lng)\n", + " print(f\"Geolocation failed for {country_name}\")\n", + " geom = \"None\"\n", + " accuracy = \"None\"\n", + "\n", + " if row[\"Location type\"] == \"Point of production (farm, ranch, plantation, etc.)\":\n", + " lat = row[\"Latitude\"]\n", + " lng = row[\"Longitude\"]\n", + " # point = (lat, lng)\n", " point = (lng, lat)\n", " geom = Point(point)\n", - " accuracy = 'High'\n", - " \n", + " accuracy = \"High\"\n", + "\n", " geometry_list.append(geom)\n", - " accuracy_list.append(accuracy)\n", - " " + " accuracy_list.append(accuracy)" ] }, { @@ -2304,10 +2309,12 @@ } ], "source": [ - "print(f\"\"\"\n", + "print(\n", + " f\"\"\"\n", "lenght of geocoded locations: {len(geometry_list)},\n", "lenght of input data: {len(input_data)}\n", - "\"\"\")" + "\"\"\"\n", + ")" ] }, { @@ -2447,8 +2454,8 @@ } ], "source": [ - "input_data['Geometry'] = geometry_list\n", - "input_data['Accuracy'] = accuracy_list\n", + "input_data[\"Geometry\"] = geometry_list\n", + "input_data[\"Accuracy\"] = accuracy_list\n", "input_data.head()" ] }, @@ -2459,8 +2466,8 @@ "outputs": [], "source": [ "gdf.to_file(\n", - " '../Processed_data/located_lg_data_polygon.shp',\n", - " driver='ESRI Shapefile',\n", + " \"../Processed_data/located_lg_data_polygon.shp\",\n", + " driver=\"ESRI Shapefile\",\n", ")" ] }, @@ -2519,8 +2526,8 @@ } ], "source": [ - "#check None geometries\n", - "input_data[input_data['Geometry']=='None']" + "# check None geometries\n", + "input_data[input_data[\"Geometry\"] == \"None\"]" ] }, { @@ -2529,7 +2536,7 @@ "metadata": {}, "outputs": [], "source": [ - "gdf = gpd.GeoDataFrame(input_data, geometry='Geometry')" + "gdf = gpd.GeoDataFrame(input_data, geometry=\"Geometry\")" ] }, { @@ -2538,9 +2545,9 @@ "metadata": {}, "outputs": [], "source": [ - "gdf_polygon = gdf[gdf['Geometry'].apply(lambda x : x.type!='Point' )]\n", - "gdf_point = gdf[gdf['Geometry'].apply(lambda x : x.type=='Point' )]\n", - "gdf_polygon = gdf_polygon[gdf_polygon['Geometry'].apply(lambda x : x.type!='LineString' )]\n" + "gdf_polygon = gdf[gdf[\"Geometry\"].apply(lambda x: x.type != \"Point\")]\n", + "gdf_point = gdf[gdf[\"Geometry\"].apply(lambda x: x.type == \"Point\")]\n", + "gdf_polygon = gdf_polygon[gdf_polygon[\"Geometry\"].apply(lambda x: x.type != \"LineString\")]" ] }, { @@ -2549,8 +2556,8 @@ "metadata": {}, "outputs": [], "source": [ - "#check the linestring data retrieved\n", - "gdf_LS = gdf[gdf['Geometry'].apply(lambda x : x.type=='LineString' )]" + "# check the linestring data retrieved\n", + "gdf_LS = gdf[gdf[\"Geometry\"].apply(lambda x: x.type == \"LineString\")]" ] }, { @@ -2636,8 +2643,8 @@ "outputs": [], "source": [ "gdf_point.to_file(\n", - " '../Processed_data/located_lg_data_point_v2.shp',\n", - " driver='ESRI Shapefile',\n", + " \"../Processed_data/located_lg_data_point_v2.shp\",\n", + " driver=\"ESRI Shapefile\",\n", ")" ] }, @@ -2648,8 +2655,8 @@ "outputs": [], "source": [ "gdf_polygon.to_file(\n", - " '../Processed_data/located_lg_data_polygon_v2.shp',\n", - " driver='ESRI Shapefile',\n", + " \"../Processed_data/located_lg_data_polygon_v2.shp\",\n", + " driver=\"ESRI Shapefile\",\n", ")" ] }, diff --git a/data/notebooks/Lab/10_1_Met_v01_Water_indicator_coeficients_csv.ipynb b/data/notebooks/Lab/10_1_Met_v01_Water_indicator_coeficients_csv.ipynb index 3111a2b49..e24482059 100644 --- a/data/notebooks/Lab/10_1_Met_v01_Water_indicator_coeficients_csv.ipynb +++ b/data/notebooks/Lab/10_1_Met_v01_Water_indicator_coeficients_csv.ipynb @@ -25,7 +25,6 @@ }, "outputs": [], "source": [ - "import numpy as np\n", "import pandas as pd" ] }, @@ -190,7 +189,9 @@ "metadata": {}, "outputs": [], "source": [ - "df_long.to_csv(\"../../indicator_coefficient_importer/data/bwfp_indicator_coefficients.csv\", index=False)" + "df_long.to_csv(\n", + " \"../../indicator_coefficient_importer/data/bwfp_indicator_coefficients.csv\", index=False\n", + ")" ] } ], diff --git a/data/notebooks/Lab/10_2_kernel_impl_explorations.ipynb b/data/notebooks/Lab/10_2_kernel_impl_explorations.ipynb index ecd11e624..2eabcf2c7 100644 --- a/data/notebooks/Lab/10_2_kernel_impl_explorations.ipynb +++ b/data/notebooks/Lab/10_2_kernel_impl_explorations.ipynb @@ -6,10 +6,10 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", + "import cv2\n", "import matplotlib.pyplot as plt\n", + "import numpy as np\n", "import rasterio as rio\n", - "import cv2\n", "import scipy" ] }, @@ -40,7 +40,7 @@ } ], "source": [ - "plt.imshow(cv2.getStructuringElement(cv2.MORPH_ELLIPSE, ksize=(70,70)))" + "plt.imshow(cv2.getStructuringElement(cv2.MORPH_ELLIPSE, ksize=(70, 70)))" ] }, { @@ -54,7 +54,6 @@ " meta = src.meta.copy()\n", " transform = src.transform\n", " arr = src.read(1)\n", - " orig_crs = src.crs\n", " # km per degree near the ecuator. At high lats this will bite us in the ass\n", " # The issue here is that the kernel size should vary depending on the raster latitude and proj\n", " # for now we will asume that the error for high lat rasters is ok but we should explore a fix.\n", @@ -62,9 +61,11 @@ "\n", " y_size_km = -transform[4] * 111 # 1 deg ~~ 111 km at ecuator\n", " radius_in_pixels = int(radius // y_size_km)\n", - " kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, ksize=(radius_in_pixels, radius_in_pixels))\n", - " \n", - " # apply the buffer using opencv filter function. \n", + " kernel = cv2.getStructuringElement(\n", + " cv2.MORPH_ELLIPSE, ksize=(radius_in_pixels, radius_in_pixels)\n", + " )\n", + "\n", + " # apply the buffer using opencv filter function.\n", " # It calculates the cross-croletation instead of the convolution but\n", " # since we are using a simetric kernel it does not matter.\n", " # Also it is 100x faster than the scipy convolve ¯\\_(ツ)_/¯\n", @@ -117,8 +118,11 @@ ], "source": [ "%%timeit\n", - "main(\"../../../../Hansen_GFC-2020-v1.8_lossyear_20S_060W_10km.tif\",\n", - " \"../../../../Hansen_GFC-2020-v1.8_lossyear_20S_060W_10km_buff.tif\", 50)" + "main(\n", + " \"../../../../Hansen_GFC-2020-v1.8_lossyear_20S_060W_10km.tif\",\n", + " \"../../../../Hansen_GFC-2020-v1.8_lossyear_20S_060W_10km_buff.tif\",\n", + " 50,\n", + ")" ] }, { @@ -142,11 +146,11 @@ "radius_in_pixels = int(50 // y_size_km)\n", "kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, ksize=(radius_in_pixels, radius_in_pixels))\n", "\n", - "# apply the buffer using opencv filter function. \n", + "# apply the buffer using opencv filter function.\n", "# It calculates the cross-croletation instead of the convolution but\n", "# since we are using a simetric kernel it does not matter.\n", "# Also it is 100x faster than the scipy convolve ¯\\_(ツ)_/¯\n", - "res_buff = cv2.filter2D(arr, ddepth=-1, kernel=kernel) / np.sum(kernel)\n" + "res_buff = cv2.filter2D(arr, ddepth=-1, kernel=kernel) / np.sum(kernel)" ] }, { @@ -195,9 +199,11 @@ "metadata": {}, "outputs": [], "source": [ - "k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, ksize=(100,100))\n", + "k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, ksize=(100, 100))\n", "\n", - "res = focal_mean(\"../../../../SpeciesRichness_IDN_2021-01-01-2022-01-01.tif\", k, \"../../../../ktest_200.tig\")" + "res = focal_mean(\n", + " \"../../../../SpeciesRichness_IDN_2021-01-01-2022-01-01.tif\", k, \"../../../../ktest_200.tig\"\n", + ")" ] }, { @@ -207,7 +213,7 @@ "outputs": [], "source": [ "src = rio.open(\"../../../../SpeciesRichness_IDN_2021-01-01-2022-01-01.tif\")\n", - "data = src.read(1)\n" + "data = src.read(1)" ] }, { @@ -303,11 +309,11 @@ "metadata": {}, "outputs": [], "source": [ - "with rio.open(\"../../../../ktest_200_cv.tif\",\"w\", **profile) as dst:\n", - " dst.write(res_cv[np.newaxis,:])\n", + "with rio.open(\"../../../../ktest_200_cv.tif\", \"w\", **profile) as dst:\n", + " dst.write(res_cv[np.newaxis, :])\n", "\n", "with rio.open(\"../../../../ktest_200.tif\", \"w\", **profile) as dst:\n", - " dst.write(res[np.newaxis,:])" + " dst.write(res[np.newaxis, :])" ] }, { @@ -346,7 +352,7 @@ "metadata": {}, "outputs": [], "source": [ - "y_size_km = -gt[4]*111" + "y_size_km = -gt[4] * 111" ] }, { @@ -398,15 +404,6 @@ "crs.is_geographic" ] }, - { - "cell_type": "code", - "execution_count": 93, - "metadata": {}, - "outputs": [], - "source": [ - "from rasterio.warp import calculate_default_transform, reproject, Resampling\n" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/data/notebooks/Lab/10_Met_v0.1_results.ipynb b/data/notebooks/Lab/10_Met_v0.1_results.ipynb index 52f02bbe9..91e6b2b5e 100644 --- a/data/notebooks/Lab/10_Met_v0.1_results.ipynb +++ b/data/notebooks/Lab/10_Met_v0.1_results.ipynb @@ -36,17 +36,16 @@ "source": [ "# import libraries\n", "import geopandas as gpd\n", - "from rasterstats import zonal_stats\n", "import rasterio as rio\n", + "from rasterstats import zonal_stats\n", "\n", "!pip install h3ronpy h3pandas --q\n", - "from h3ronpy import raster\n", + "import os\n", + "\n", "import h3\n", - "import h3pandas\n", "import pandas as pd\n", - "from shapely.geometry import Polygon\n", - "\n", - "import os" + "from h3ronpy import raster\n", + "from shapely.geometry import Polygon" ] }, { @@ -57,10 +56,10 @@ "outputs": [], "source": [ "import numpy as np\n", - "from PIL import Image\n", "import scipy.ndimage\n", "import scipy.signal\n", - "from osgeo import gdal" + "from osgeo import gdal\n", + "from PIL import Image" ] }, { @@ -70,7 +69,7 @@ "metadata": {}, "outputs": [], "source": [ - "def buffer_stats(raster_path, vector_path, buffer=50000, stat_='sum', all_touched = True):\n", + "def buffer_stats(raster_path, vector_path, buffer=50000, stat_=\"sum\", all_touched=True):\n", " \"\"\"\n", " inputs:\n", " -------------\n", @@ -78,73 +77,71 @@ " vector_path: path to point file in EPSG:4326\n", " buffer: distance in metres for coputing the buffer\n", " stats: stadistics to compute\n", - " \n", + "\n", " output\n", " -------\n", " array with statistics\"\"\"\n", - " \n", - " #open vector file\n", + "\n", + " # open vector file\n", " gdf = gpd.read_file(vector_path)\n", - " #check projection\n", - " #if gdf.crs != True:\n", + " # check projection\n", + " # if gdf.crs != True:\n", " # print(gdf.crs)\n", " # #project\n", " # print('Dataset missing projection. Please assign one!')\n", - " if gdf.crs and gdf.crs == 'EPSG:4326':\n", - " #reproject\n", - " gdf_3857 = gdf.to_crs('EPSG:3857')\n", + " if gdf.crs and gdf.crs == \"EPSG:4326\":\n", + " # reproject\n", + " gdf_3857 = gdf.to_crs(\"EPSG:3857\")\n", " ## TODO:add other validations\n", - " \n", "\n", - " #get buffer\n", + " # get buffer\n", " gdf_3857_buffer = gdf_3857.buffer(buffer)\n", - " #reproject back to epsg4326\n", - " gdf_4326_buffer = gdf_3857_buffer.to_crs('EPSG:4326')\n", - " #get statistics\n", + " # reproject back to epsg4326\n", + " gdf_4326_buffer = gdf_3857_buffer.to_crs(\"EPSG:4326\")\n", + " # get statistics\n", " vizz_stats = []\n", " for geom in gdf_4326_buffer:\n", - " stats = zonal_stats(geom,\n", - " raster_path,\n", - " stats=stat_,\n", - " all_touched = all_touched\n", - " )\n", - " stat_sum = stats[0]['sum']\n", + " stats = zonal_stats(geom, raster_path, stats=stat_, all_touched=all_touched)\n", + " stat_sum = stats[0][\"sum\"]\n", " vizz_stats.append(stat_sum)\n", - " #add stats in dataframe\n", - " gdf['estimated']=vizz_stats\n", + " # add stats in dataframe\n", + " gdf[\"estimated\"] = vizz_stats\n", " return gdf\n", "\n", - "def raster_to_h3(raster_path, resolution=6, field='value', plot=False):\n", + "\n", + "def raster_to_h3(raster_path, resolution=6, field=\"value\", plot=False):\n", " \"\"\"convert raster to h3 with a given h3 resolution. Returns a gdf with the h3 geometries.\"\"\"\n", - " \n", - " with rio.open(raster_path) as src:\n", - " gdf = raster.raster_to_geodataframe(src.read(1), src.transform, h3_resolution=resolution, nodata_value=src.profile['nodata'], compacted=False)\n", "\n", - " gdf = gdf.rename(columns={'value':field})\n", + " with rio.open(raster_path) as src:\n", + " gdf = raster.raster_to_geodataframe(\n", + " src.read(1),\n", + " src.transform,\n", + " h3_resolution=resolution,\n", + " nodata_value=src.profile[\"nodata\"],\n", + " compacted=False,\n", + " )\n", + "\n", + " gdf = gdf.rename(columns={\"value\": field})\n", " if plot:\n", " gdf.plot(field)\n", - " gdf['h3index'] = gdf['h3index'].apply(hex)\n", - " \n", + " gdf[\"h3index\"] = gdf[\"h3index\"].apply(hex)\n", + "\n", " return gdf\n", - " \n", - " \n", - "def focal_mean(raster_path, \n", - " kernel_path, \n", - " output_path):\n", - " #open deforestation array\n", + "\n", + "\n", + "def focal_mean(raster_path, kernel_path, output_path):\n", + " # open deforestation array\n", " ds_def = gdal.Open(raster_path)\n", " def_array = np.array(ds_def.GetRasterBand(1).ReadAsArray())\n", - " \n", - " #open kernel path\n", + "\n", + " # open kernel path\n", " ds_kernnel = gdal.Open(kernel_path)\n", " kernnel_array = np.array(ds_kernnel.GetRasterBand(1).ReadAsArray())\n", - " \n", - " #perform the focal mean with convolute\n", + "\n", + " # perform the focal mean with convolute\n", " result_fm = scipy.ndimage.convolve(def_array, weights=kernnel_array) / kernnel_array.size\n", " im = Image.fromarray(result_fm)\n", - " im.save(output_path)\n", - " \n", - " " + " im.save(output_path)" ] }, { @@ -212,23 +209,22 @@ "outputs": [], "source": [ "# get deforestation in buffer zones\n", - "\n", - "vector_path = '../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp'\n", + "vector_path = \"../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp\"\n", "resolution = 6\n", "\n", "gdf_vector = gpd.read_file(vector_path)\n", - "clean_gdf = gdf_vector[['gfw_fid','deforestat','geometry']]\n", + "clean_gdf = gdf_vector[[\"gfw_fid\", \"deforestat\", \"geometry\"]]\n", "\n", "_sum_calculated = []\n", "for i, row in clean_gdf.iterrows():\n", - " filtered_gdf = clean_gdf[i:i+1]\n", - " #convert to h3\n", + " filtered_gdf = clean_gdf[i : i + 1]\n", + " # convert to h3\n", " h3_gdf = filtered_gdf.h3.polyfill_resample(resolution)\n", - " h3index_list = [f'0x{h3index}' for h3index in h3_gdf.index]\n", - " _sum = merge_gdf[merge_gdf['h3index'].isin(h3index_list)]['deforestation_km2'].sum()*100\n", + " h3index_list = [f\"0x{h3index}\" for h3index in h3_gdf.index]\n", + " _sum = merge_gdf[merge_gdf[\"h3index\"].isin(h3index_list)][\"deforestation_km2\"].sum() * 100\n", " _sum_calculated.append(_sum)\n", - " \n", - "#_sum_calculated" + "\n", + "# _sum_calculated" ] }, { @@ -238,11 +234,14 @@ "metadata": {}, "outputs": [], "source": [ - "#zonal statistics raster\n", - "stats_ = buffer_stats('../../datasets/raw/methodology_results/Deforestation_IDN_2021-01-01-2022-01-01.tif',\n", - " '../../datasets/processed/palm_oil_mills/satelligence_mills_4326_point.shp',\n", - " buffer=50000,\n", - " stat_='sum', all_touched = False)" + "# zonal statistics raster\n", + "stats_ = buffer_stats(\n", + " \"../../datasets/raw/methodology_results/Deforestation_IDN_2021-01-01-2022-01-01.tif\",\n", + " \"../../datasets/processed/palm_oil_mills/satelligence_mills_4326_point.shp\",\n", + " buffer=50000,\n", + " stat_=\"sum\",\n", + " all_touched=False,\n", + ")" ] }, { @@ -252,7 +251,7 @@ "metadata": {}, "outputs": [], "source": [ - "def_raster = list(stats_['estimated']*6.69019042035408517*6.69019042035408517* 0.0001)" + "def_raster = list(stats_[\"estimated\"] * 6.69019042035408517 * 6.69019042035408517 * 0.0001)" ] }, { @@ -314,20 +313,23 @@ "# ultiply def area in hectares\n", "# then filter all locations where there is production\n", "\n", - "def_density = '../../datasets/raw/methodology_results/Deforestation_IDN_2021-01-01-2022-01-01_density.tif'\n", - "def_area_ha = '../../datasets/raw/methodology_results/update/Deforestation_IDN_2021-01-01-2022-01-01_area_ha.tif'\n", - "kernel_50km = '../../datasets/raw/methodology_results/test_location_buffer_raster.tif'\n", + "def_density = (\n", + " \"../../datasets/raw/methodology_results/Deforestation_IDN_2021-01-01-2022-01-01_density.tif\"\n", + ")\n", + "def_area_ha = \"../../datasets/raw/methodology_results/update/Deforestation_IDN_2021-01-01-2022-01-01_area_ha.tif\"\n", + "kernel_50km = \"../../datasets/raw/methodology_results/test_location_buffer_raster.tif\"\n", "\n", - "# pixel area in hectares = 8633.766614450342 \n", - "#calculate deforestation area\n", + "# pixel area in hectares = 8633.766614450342\n", + "# calculate deforestation area\n", "!gdal_calc.py --calc \"A*8633.766614450342\" --format GTiff --type Float32 --NoDataValue 0.0 -A $def_density --A_band 1 --outfile $def_area_ha;\n", "\n", "\n", "## generate kernel\n", - "focal_mean(raster_path = def_area_ha, \n", - " kernel_path = kernel_50km, \n", - " output_path = '../../datasets/raw/methodology_results/update/deforestation_50km_kernel_v2.tif')\n", - "\n" + "focal_mean(\n", + " raster_path=def_area_ha,\n", + " kernel_path=kernel_50km,\n", + " output_path=\"../../datasets/raw/methodology_results/update/deforestation_50km_kernel_v2.tif\",\n", + ")" ] }, { @@ -387,7 +389,7 @@ "metadata": {}, "outputs": [], "source": [ - "#set projection\n", + "# set projection\n", "\n", "## change extent and set projection\n", "\n", @@ -411,9 +413,11 @@ "source": [ "empty_array = np.zeros((2160, 4320))\n", "im = Image.fromarray(empty_array)\n", - "im.save('../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/empty.tif')\n", + "im.save(\n", + " \"../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/empty.tif\"\n", + ")\n", "# geolocate with new extent\n", - "!gdal_edit.py -a_srs EPSG:4326 -a_ulurll -180.0000000 90.0000000 179.9985600 90.0000000 -180.0000000 -89.9992800 -a_nodata -1 '../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/empty.tif'\n" + "!gdal_edit.py -a_srs EPSG:4326 -a_ulurll -180.0000000 90.0000000 179.9985600 90.0000000 -180.0000000 -89.9992800 -a_nodata -1 '../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/empty.tif'" ] }, { @@ -433,13 +437,22 @@ } ], "source": [ - "all_ha_commodities = [file for file in os.listdir('../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff') if file.endswith('_A.tif')]\n", - "\n", - "for i in range(0,len(all_ha_commodities)):\n", - " file = '../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/'+ all_ha_commodities[i]\n", - " #print(f'Summing {all_ha_commodities[i]}...')\n", + "all_ha_commodities = [\n", + " file\n", + " for file in os.listdir(\n", + " \"../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff\"\n", + " )\n", + " if file.endswith(\"_A.tif\")\n", + "]\n", + "\n", + "for i in range(0, len(all_ha_commodities)):\n", + " file = (\n", + " \"../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/\"\n", + " + all_ha_commodities[i]\n", + " )\n", + " # print(f'Summing {all_ha_commodities[i]}...')\n", " !gdal_calc.py --calc \"A+B\" --NoDataValue -1 --format GTiff --type Float32 --NoDataValue -1 -A ../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/empty.tif --A_band 1 -B $file --outfile ../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/empty.tif --q;\n", - "print('Done!')" + "print(\"Done!\")" ] }, { @@ -458,7 +471,7 @@ } ], "source": [ - "#clip data to area of interest\n", + "# clip data to area of interest\n", "!gdal_translate -projwin 94.99998 6.10002 98.333313333 2.10002 -of GTiff ../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/empty.tif ../../datasets/raw/methodology_results/harvest_area_mapspam/harvest_area_sum_ha_clip.tif;" ] }, @@ -681,46 +694,78 @@ } ], "source": [ - "#translate density raster to h3\n", - "rp_density = '../../datasets/raw/methodology_results/Deforestation_IDN_2021-01-01-2022-01-01_density.tif'\n", - "rp_area = '../../datasets/processed/Satelligence_data/area_ratio/8_Areakm_clip_ind.tif'\n", - "rp_oil_prod_t = '../../datasets/raw/methodology_results/spam_palm_oil_prod_clip.tif'\n", - "rp_oil_ha = '../../datasets/raw/methodology_results/spam_palm_oil_ha_clip.tif'\n", - "rp_all_comm_ha = '../../datasets/raw/methodology_results/harvest_area_mapspam/harvest_area_sum_ha_clip.tif'\n", - "kernel_Def = '../../datasets/raw/methodology_results/update/deforestation_50km_kernel_v2.tif'\n", + "# translate density raster to h3\n", + "rp_density = (\n", + " \"../../datasets/raw/methodology_results/Deforestation_IDN_2021-01-01-2022-01-01_density.tif\"\n", + ")\n", + "rp_area = \"../../datasets/processed/Satelligence_data/area_ratio/8_Areakm_clip_ind.tif\"\n", + "rp_oil_prod_t = \"../../datasets/raw/methodology_results/spam_palm_oil_prod_clip.tif\"\n", + "rp_oil_ha = \"../../datasets/raw/methodology_results/spam_palm_oil_ha_clip.tif\"\n", + "rp_all_comm_ha = (\n", + " \"../../datasets/raw/methodology_results/harvest_area_mapspam/harvest_area_sum_ha_clip.tif\"\n", + ")\n", + "kernel_Def = \"../../datasets/raw/methodology_results/update/deforestation_50km_kernel_v2.tif\"\n", "\n", "\n", "resolution = 6\n", "\n", "\n", - "\n", - "\n", - "gdf_def_density = raster_to_h3(rp_density, resolution=resolution, field ='def_density', plot=True)\n", + "gdf_def_density = raster_to_h3(rp_density, resolution=resolution, field=\"def_density\", plot=True)\n", "\n", "# translate pixel area to h3 to compute pixel area/h3 area ratio\n", - "#translate density raster to h3\n", - "gdf_def_area = raster_to_h3(rp_area, resolution=resolution, field='pixel_area_km2')\n", - "gdf_po_prod = raster_to_h3(rp_oil_prod_t, resolution=resolution, field='prod_t', plot=True)\n", - "gdf_po_ha = raster_to_h3(rp_oil_ha, resolution=resolution, field='harvst_ha', plot=True)\n", - "gdf_allcommodities_ha = raster_to_h3(rp_all_comm_ha, resolution=resolution, field='harvst_all_ha', plot=True)\n", - "gdf_kernel_Def = raster_to_h3(kernel_Def, resolution=resolution, field='kernel_def_ha', plot=True)\n", + "# translate density raster to h3\n", + "gdf_def_area = raster_to_h3(rp_area, resolution=resolution, field=\"pixel_area_km2\")\n", + "gdf_po_prod = raster_to_h3(rp_oil_prod_t, resolution=resolution, field=\"prod_t\", plot=True)\n", + "gdf_po_ha = raster_to_h3(rp_oil_ha, resolution=resolution, field=\"harvst_ha\", plot=True)\n", + "gdf_allcommodities_ha = raster_to_h3(\n", + " rp_all_comm_ha, resolution=resolution, field=\"harvst_all_ha\", plot=True\n", + ")\n", + "gdf_kernel_Def = raster_to_h3(kernel_Def, resolution=resolution, field=\"kernel_def_ha\", plot=True)\n", "\n", "\n", "## merge datasets\n", "\n", - "gdf_merge = gdf_po_prod.merge(gdf_po_ha, on='h3index', how='outer').merge(gdf_def_area, on='h3index', how='outer')[['h3index', 'pixel_area_km2', 'prod_t', 'harvst_ha', 'geometry']].merge(gdf_def_density, on='h3index', how='outer').merge(gdf_allcommodities_ha, on='h3index', how='outer')\n", - "\n", - "\n", - "#clean merged dataset - get just one geometry\n", - "\n", - "gdf_merge = gdf_merge[['h3index','def_density', 'pixel_area_km2', 'prod_t', 'harvst_ha','harvst_all_ha','geometry_x']].rename(columns={'geometry_x':'geometry'})\n", - "gdf_merge = gdf_merge.merge(gdf_kernel_Def, on='h3index', how='outer')[['h3index','def_density', 'pixel_area_km2', 'prod_t', 'harvst_ha','harvst_all_ha','kernel_def_ha','geometry_x']].rename(columns={'geometry_x':'geometry'})\n", - "\n", - "#calculate deforestation area \n", - "gdf_merge['def_area_ha'] = gdf_merge['pixel_area_km2']*100*gdf_merge['def_density']\n", - "gdf_merge['h3index'] = [h3index.split('x')[1] for h3index in gdf_merge['h3index']]\n", - "gdf_merge['h3Area_km2'] = [h3.cell_area(h3index) for h3index in list(gdf_merge['h3index'])]\n", - "gdf_merge['area_ratio'] = gdf_merge['h3Area_km2']/gdf_merge['pixel_area_km2']\n", + "gdf_merge = (\n", + " gdf_po_prod.merge(gdf_po_ha, on=\"h3index\", how=\"outer\")\n", + " .merge(gdf_def_area, on=\"h3index\", how=\"outer\")[\n", + " [\"h3index\", \"pixel_area_km2\", \"prod_t\", \"harvst_ha\", \"geometry\"]\n", + " ]\n", + " .merge(gdf_def_density, on=\"h3index\", how=\"outer\")\n", + " .merge(gdf_allcommodities_ha, on=\"h3index\", how=\"outer\")\n", + ")\n", + "\n", + "\n", + "# clean merged dataset - get just one geometry\n", + "\n", + "gdf_merge = gdf_merge[\n", + " [\n", + " \"h3index\",\n", + " \"def_density\",\n", + " \"pixel_area_km2\",\n", + " \"prod_t\",\n", + " \"harvst_ha\",\n", + " \"harvst_all_ha\",\n", + " \"geometry_x\",\n", + " ]\n", + "].rename(columns={\"geometry_x\": \"geometry\"})\n", + "gdf_merge = gdf_merge.merge(gdf_kernel_Def, on=\"h3index\", how=\"outer\")[\n", + " [\n", + " \"h3index\",\n", + " \"def_density\",\n", + " \"pixel_area_km2\",\n", + " \"prod_t\",\n", + " \"harvst_ha\",\n", + " \"harvst_all_ha\",\n", + " \"kernel_def_ha\",\n", + " \"geometry_x\",\n", + " ]\n", + "].rename(columns={\"geometry_x\": \"geometry\"})\n", + "\n", + "# calculate deforestation area\n", + "gdf_merge[\"def_area_ha\"] = gdf_merge[\"pixel_area_km2\"] * 100 * gdf_merge[\"def_density\"]\n", + "gdf_merge[\"h3index\"] = [h3index.split(\"x\")[1] for h3index in gdf_merge[\"h3index\"]]\n", + "gdf_merge[\"h3Area_km2\"] = [h3.cell_area(h3index) for h3index in list(gdf_merge[\"h3index\"])]\n", + "gdf_merge[\"area_ratio\"] = gdf_merge[\"h3Area_km2\"] / gdf_merge[\"pixel_area_km2\"]\n", "\n", "gdf_merge.head()" ] @@ -741,8 +786,8 @@ } ], "source": [ - "gdf_merge = gdf_merge.set_geometry('geometry')\n", - "gdf_merge.to_file('../../datasets/raw/methodology_results/update/gdf_kernel_Deforestation_v2.shp')" + "gdf_merge = gdf_merge.set_geometry(\"geometry\")\n", + "gdf_merge.to_file(\"../../datasets/raw/methodology_results/update/gdf_kernel_Deforestation_v2.shp\")" ] }, { @@ -819,16 +864,18 @@ } ], "source": [ - "point_location = gpd.read_file('../../datasets/raw/methodology_results/test_location_point.geojson')\n", + "point_location = gpd.read_file(\"../../datasets/raw/methodology_results/test_location_point.geojson\")\n", "point_location = point_location.h3.geo_to_h3(6).reset_index(drop=False)\n", "\n", - "point_location = point_location[['h3_06']]\n", + "point_location = point_location[[\"h3_06\"]]\n", "\n", - "point_location['geometry'] = Polygon(h3.h3_to_geo_boundary(point_location['h3_06'][0], geo_json=True))\n", - "point_location = point_location.set_geometry('geometry')\n", - "#point_location.to_file('../../datasets/raw/methodology_results/test_location_point_h3_res6_v3.shp')\n", + "point_location[\"geometry\"] = Polygon(\n", + " h3.h3_to_geo_boundary(point_location[\"h3_06\"][0], geo_json=True)\n", + ")\n", + "point_location = point_location.set_geometry(\"geometry\")\n", + "# point_location.to_file('../../datasets/raw/methodology_results/test_location_point_h3_res6_v3.shp')\n", "\n", - "point_location\n" + "point_location" ] }, { @@ -910,12 +957,12 @@ } ], "source": [ - "#obtain deforestation that takes places in that hexagon\n", + "# obtain deforestation that takes places in that hexagon\n", "\n", - "h3index_list = list(point_location['h3_06'])\n", + "h3index_list = list(point_location[\"h3_06\"])\n", "\n", - "def_point_loc = gdf_merge[gdf_merge['h3index'].isin(h3index_list)]\n", - "def_point_loc\n" + "def_point_loc = gdf_merge[gdf_merge[\"h3index\"].isin(h3index_list)]\n", + "def_point_loc" ] }, { @@ -936,19 +983,23 @@ } ], "source": [ - "#asumming volume equal to 1T\n", - "land_impact_point = 1000*def_point_loc['harvst_ha'].sum()/def_point_loc['prod_t'].sum()\n", - "print(f'land impact: {land_impact_point} ha')\n", + "# asumming volume equal to 1T\n", + "land_impact_point = 1000 * def_point_loc[\"harvst_ha\"].sum() / def_point_loc[\"prod_t\"].sum()\n", + "print(f\"land impact: {land_impact_point} ha\")\n", "\n", - "def_if = sum(def_point_loc['kernel_def_ha'] * def_point_loc['prod_t'])/ def_point_loc['prod_t'].sum()\n", - "print(f'Dif: {def_if}')\n", + "def_if = (\n", + " sum(def_point_loc[\"kernel_def_ha\"] * def_point_loc[\"prod_t\"]) / def_point_loc[\"prod_t\"].sum()\n", + ")\n", + "print(f\"Dif: {def_if}\")\n", "\n", - "#Weighted mean total cropland area per pixel\n", - "def_total_cropland_area_per_pixel = (def_point_loc['harvst_all_ha'] * def_point_loc['prod_t']).dropna().sum() /def_point_loc['prod_t'].sum()\n", - "print(f'Mean cropland area: {def_total_cropland_area_per_pixel}')\n", + "# Weighted mean total cropland area per pixel\n", + "def_total_cropland_area_per_pixel = (\n", + " def_point_loc[\"harvst_all_ha\"] * def_point_loc[\"prod_t\"]\n", + ").dropna().sum() / def_point_loc[\"prod_t\"].sum()\n", + "print(f\"Mean cropland area: {def_total_cropland_area_per_pixel}\")\n", "\n", "def_impact_2 = (def_if * land_impact_point) / def_total_cropland_area_per_pixel\n", - "print(f'Revised forest loss risk:{def_impact_2} ha')" + "print(f\"Revised forest loss risk:{def_impact_2} ha\")" ] }, { @@ -1045,17 +1096,20 @@ } ], "source": [ - "agg_point = gpd.read_file('../../datasets/raw/methodology_results/test_location_point.geojson')\n", - "agg_point = agg_point.to_crs('EPSG:3857')\n", + "agg_point = gpd.read_file(\"../../datasets/raw/methodology_results/test_location_point.geojson\")\n", + "agg_point = agg_point.to_crs(\"EPSG:3857\")\n", "agg_point = agg_point.buffer(50000)\n", - "agg_point = agg_point.to_crs('EPSG:4326')\n", + "agg_point = agg_point.to_crs(\"EPSG:4326\")\n", "\n", - "h3_agg_point = h3.polyfill(agg_point.geometry[0].__geo_interface__, 6, geo_json_conformant = True)\n", + "h3_agg_point = h3.polyfill(agg_point.geometry[0].__geo_interface__, 6, geo_json_conformant=True)\n", "\n", "agg_point_gdf = gpd.GeoDataFrame(h3_agg_point)\n", - "agg_point_gdf = agg_point_gdf.rename(columns={0:'h3index'})\n", - "agg_point_gdf['geometry'] = [Polygon(h3.h3_to_geo_boundary(h3index, geo_json=True)) for h3index in list(agg_point_gdf['h3index'])]\n", - "#agg_point_gdf.to_file('../../datasets/raw/methodology_results/test_agg_point_h3_res6_v2.shp')\n", + "agg_point_gdf = agg_point_gdf.rename(columns={0: \"h3index\"})\n", + "agg_point_gdf[\"geometry\"] = [\n", + " Polygon(h3.h3_to_geo_boundary(h3index, geo_json=True))\n", + " for h3index in list(agg_point_gdf[\"h3index\"])\n", + "]\n", + "# agg_point_gdf.to_file('../../datasets/raw/methodology_results/test_agg_point_h3_res6_v2.shp')\n", "agg_point_gdf.head()" ] }, @@ -1210,10 +1264,10 @@ } ], "source": [ - "#obtain deforestation that takes places in that hexagon\n", - "h3index_list = list(agg_point_gdf['h3index'])\n", + "# obtain deforestation that takes places in that hexagon\n", + "h3index_list = list(agg_point_gdf[\"h3index\"])\n", "\n", - "def_agg_loc = gdf_merge[gdf_merge['h3index'].isin(h3index_list)]\n", + "def_agg_loc = gdf_merge[gdf_merge[\"h3index\"].isin(h3index_list)]\n", "def_agg_loc.head()" ] }, @@ -1235,19 +1289,24 @@ } ], "source": [ - "#asumming volume equal to 1T\n", - "land_impact_agg_point = 1000*def_agg_loc['harvst_ha'].sum()/def_agg_loc['prod_t'].sum()\n", - "print(f'land impact: {land_impact_agg_point} ha')\n", - "\n", - "def_if = sum((def_agg_loc['kernel_def_ha'] * def_agg_loc['prod_t']).dropna()) / def_agg_loc['prod_t'].sum()\n", - "print(f'Dif: {def_if}')\n", - "\n", - "#Weighted mean total cropland area per pixel\n", - "def_total_cropland_area_per_pixel = (def_agg_loc['harvst_all_ha'] * def_agg_loc['prod_t']).dropna().sum() /def_agg_loc['prod_t'].sum()\n", - "print(f'Mean cropland area: {def_total_cropland_area_per_pixel}')\n", + "# asumming volume equal to 1T\n", + "land_impact_agg_point = 1000 * def_agg_loc[\"harvst_ha\"].sum() / def_agg_loc[\"prod_t\"].sum()\n", + "print(f\"land impact: {land_impact_agg_point} ha\")\n", + "\n", + "def_if = (\n", + " sum((def_agg_loc[\"kernel_def_ha\"] * def_agg_loc[\"prod_t\"]).dropna())\n", + " / def_agg_loc[\"prod_t\"].sum()\n", + ")\n", + "print(f\"Dif: {def_if}\")\n", + "\n", + "# Weighted mean total cropland area per pixel\n", + "def_total_cropland_area_per_pixel = (\n", + " def_agg_loc[\"harvst_all_ha\"] * def_agg_loc[\"prod_t\"]\n", + ").dropna().sum() / def_agg_loc[\"prod_t\"].sum()\n", + "print(f\"Mean cropland area: {def_total_cropland_area_per_pixel}\")\n", "\n", "def_impact_agg_2 = (def_if * land_impact_agg_point) / def_total_cropland_area_per_pixel\n", - "print(f'Revised forest loss risk:{def_impact_agg_2} ha')\n" + "print(f\"Revised forest loss risk:{def_impact_agg_2} ha\")" ] }, { @@ -1273,19 +1332,23 @@ "evalue": "name 'def_impact_agg' is not defined", "output_type": "error", "traceback": [ - "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", - "\u001B[0;31mNameError\u001B[0m Traceback (most recent call last)", - "\u001B[0;32m\u001B[0m in \u001B[0;36m\u001B[0;34m\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[0;31m## map - land impact aggregation point:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 2\u001B[0m \u001B[0mdef_agg_loc\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'land_impact_ha'\u001B[0m\u001B[0;34m]\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;34m(\u001B[0m\u001B[0mdef_agg_loc\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'prod_t'\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m*\u001B[0m\u001B[0mland_impact_agg_point\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;34m/\u001B[0m \u001B[0mdef_agg_loc\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'prod_t'\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0msum\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m----> 3\u001B[0;31m \u001B[0mdef_agg_loc\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'def_impact_ha'\u001B[0m\u001B[0;34m]\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;34m(\u001B[0m\u001B[0mdef_agg_loc\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'prod_t'\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m*\u001B[0m\u001B[0mdef_impact_agg\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;34m/\u001B[0m \u001B[0mdef_agg_loc\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'prod_t'\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0msum\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 4\u001B[0m \u001B[0mdef_agg_loc\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mdef_agg_loc\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mset_geometry\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'geometry'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 5\u001B[0m \u001B[0;31m#def_agg_loc.to_file('../../datasets/raw/methodology_results/update/agg_point_h3_res6_impact_v1_kernel.shp')\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", - "\u001B[0;31mNameError\u001B[0m: name 'def_impact_agg' is not defined" + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m## map - land impact aggregation point:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mdef_agg_loc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'land_impact_ha'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdef_agg_loc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'prod_t'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mland_impact_agg_point\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mdef_agg_loc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'prod_t'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mdef_agg_loc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'def_impact_ha'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdef_agg_loc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'prod_t'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mdef_impact_agg\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mdef_agg_loc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'prod_t'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mdef_agg_loc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdef_agg_loc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_geometry\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'geometry'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m#def_agg_loc.to_file('../../datasets/raw/methodology_results/update/agg_point_h3_res6_impact_v1_kernel.shp')\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'def_impact_agg' is not defined" ] } ], "source": [ "## map - land impact aggregation point:\n", - "def_agg_loc['land_impact_ha'] = (def_agg_loc['prod_t']*land_impact_agg_point) / def_agg_loc['prod_t'].sum()\n", - "def_agg_loc['def_impact_ha'] = (def_agg_loc['prod_t']*def_impact_agg) / def_agg_loc['prod_t'].sum()\n", - "def_agg_loc = def_agg_loc.set_geometry('geometry')\n", - "#def_agg_loc.to_file('../../datasets/raw/methodology_results/update/agg_point_h3_res6_impact_v1_kernel.shp')\n", + "def_agg_loc[\"land_impact_ha\"] = (def_agg_loc[\"prod_t\"] * land_impact_agg_point) / def_agg_loc[\n", + " \"prod_t\"\n", + "].sum()\n", + "def_agg_loc[\"def_impact_ha\"] = (def_agg_loc[\"prod_t\"] * def_impact_agg) / def_agg_loc[\n", + " \"prod_t\"\n", + "].sum()\n", + "def_agg_loc = def_agg_loc.set_geometry(\"geometry\")\n", + "# def_agg_loc.to_file('../../datasets/raw/methodology_results/update/agg_point_h3_res6_impact_v1_kernel.shp')\n", "\n", "def_agg_loc.head()" ] @@ -1378,21 +1441,25 @@ } ], "source": [ - "adm_loc = gpd.read_file('../../datasets/raw/methodology_results/aceh_loc.geojson')\n", + "adm_loc = gpd.read_file(\"../../datasets/raw/methodology_results/aceh_loc.geojson\")\n", "adm_loc = adm_loc.explode(index_parts=True)\n", - "h3_multipol = [h3.polyfill(geom.__geo_interface__, 6, geo_json_conformant = True) for geom in list(adm_loc['geometry'])]\n", + "h3_multipol = [\n", + " h3.polyfill(geom.__geo_interface__, 6, geo_json_conformant=True)\n", + " for geom in list(adm_loc[\"geometry\"])\n", + "]\n", "\n", - "for i in range(0,len(h3_multipol)):\n", + "for i in range(0, len(h3_multipol)):\n", " if i == 0:\n", " df_mult = pd.DataFrame(h3_multipol[i])\n", " else:\n", - " \n", " df_ = pd.DataFrame(h3_multipol[i])\n", " df_mult = pd.concat([df_mult, df_])\n", - "df_mult = df_mult.rename(columns={0:'h3index'})\n", - "df_mult['geometry'] = [Polygon(h3.h3_to_geo_boundary(h3index, geo_json=True)) for h3index in list(df_mult['h3index'])]\n", - "df_mult = df_mult.set_geometry('geometry')\n", - "#df_mult.to_file('../../datasets/raw/methodology_results/test_aceh_h3_res6.shp')\n", + "df_mult = df_mult.rename(columns={0: \"h3index\"})\n", + "df_mult[\"geometry\"] = [\n", + " Polygon(h3.h3_to_geo_boundary(h3index, geo_json=True)) for h3index in list(df_mult[\"h3index\"])\n", + "]\n", + "df_mult = df_mult.set_geometry(\"geometry\")\n", + "# df_mult.to_file('../../datasets/raw/methodology_results/test_aceh_h3_res6.shp')\n", "df_mult.head()" ] }, @@ -1547,10 +1614,10 @@ } ], "source": [ - "#obtain deforestation that takes places in that hexagon\n", - "h3index_list = list(df_mult['h3index'])\n", + "# obtain deforestation that takes places in that hexagon\n", + "h3index_list = list(df_mult[\"h3index\"])\n", "\n", - "def_aceh = gdf_merge[gdf_merge['h3index'].isin(h3index_list)]\n", + "def_aceh = gdf_merge[gdf_merge[\"h3index\"].isin(h3index_list)]\n", "def_aceh.head()" ] }, @@ -1571,19 +1638,21 @@ } ], "source": [ - "#asumming volume equal to 1T\n", - "land_impact_aceh = 1000*def_aceh['harvst_ha'].sum()/def_aceh['prod_t'].sum()\n", - "print(f'land impact: {land_impact_aceh} ha')\n", + "# asumming volume equal to 1T\n", + "land_impact_aceh = 1000 * def_aceh[\"harvst_ha\"].sum() / def_aceh[\"prod_t\"].sum()\n", + "print(f\"land impact: {land_impact_aceh} ha\")\n", "\n", - "def_if = (def_aceh['kernel_def_ha'] * def_aceh['prod_t']).dropna().sum() /def_aceh['prod_t'].sum()\n", - "print(f'Dif: {def_if}')\n", + "def_if = (def_aceh[\"kernel_def_ha\"] * def_aceh[\"prod_t\"]).dropna().sum() / def_aceh[\"prod_t\"].sum()\n", + "print(f\"Dif: {def_if}\")\n", "\n", - "#Weighted mean total cropland area per pixel\n", - "def_total_cropland_area_per_pixel = (def_aceh['harvst_all_ha'] * def_aceh['prod_t']).dropna().sum() /def_aceh['prod_t'].sum()\n", - "print(f'Mean cropland area: {def_total_cropland_area_per_pixel}')\n", + "# Weighted mean total cropland area per pixel\n", + "def_total_cropland_area_per_pixel = (\n", + " def_aceh[\"harvst_all_ha\"] * def_aceh[\"prod_t\"]\n", + ").dropna().sum() / def_aceh[\"prod_t\"].sum()\n", + "print(f\"Mean cropland area: {def_total_cropland_area_per_pixel}\")\n", "\n", "def_impact_aceh_2 = (def_if * land_impact_aceh) / def_total_cropland_area_per_pixel\n", - "print(f'Revised forest loss risk:{def_impact_aceh_2} ha')\n" + "print(f\"Revised forest loss risk:{def_impact_aceh_2} ha\")" ] }, { @@ -1763,10 +1832,10 @@ } ], "source": [ - "def_aceh['land_impact_ha'] = (def_aceh['prod_t']*land_impact_aceh) / def_aceh['prod_t'].sum()\n", - "def_aceh['def_impact_ha'] = (def_aceh['prod_t']*def_impact_aceh) / def_aceh['prod_t'].sum()\n", - "def_aceh = def_aceh.set_geometry('geometry')\n", - "def_aceh.to_file('../../datasets/raw/methodology_results/update/Aceh_h3_res6_impact_v1_kernel.shp')\n", + "def_aceh[\"land_impact_ha\"] = (def_aceh[\"prod_t\"] * land_impact_aceh) / def_aceh[\"prod_t\"].sum()\n", + "def_aceh[\"def_impact_ha\"] = (def_aceh[\"prod_t\"] * def_impact_aceh) / def_aceh[\"prod_t\"].sum()\n", + "def_aceh = def_aceh.set_geometry(\"geometry\")\n", + "def_aceh.to_file(\"../../datasets/raw/methodology_results/update/Aceh_h3_res6_impact_v1_kernel.shp\")\n", "\n", "def_aceh.head()" ] diff --git a/data/notebooks/Lab/11_woodpulp_and_sateligence_preprocessing.ipynb b/data/notebooks/Lab/11_woodpulp_and_sateligence_preprocessing.ipynb index bb2d186a3..2fa88fdc6 100644 --- a/data/notebooks/Lab/11_woodpulp_and_sateligence_preprocessing.ipynb +++ b/data/notebooks/Lab/11_woodpulp_and_sateligence_preprocessing.ipynb @@ -9,22 +9,15 @@ }, "outputs": [], "source": [ - "from collections import namedtuple\n", - "import math\n", - "import os\n", "from pathlib import Path\n", "\n", - "\n", - "import numpy as np\n", "import rasterio as rio\n", "import rioxarray\n", "from affine import Affine\n", - "from h3ronpy import raster\n", "from h3ronpy.raster import nearest_h3_resolution, raster_to_dataframe\n", "from rasterio.coords import BoundingBox\n", "from rasterio.enums import Resampling\n", - "from rasterio.plot import show\n", - "from shapely.geometry import Polygon" + "from rasterio.plot import show" ] }, { @@ -106,14 +99,21 @@ "metadata": {}, "outputs": [], "source": [ - "def find_h3_res_best_fit(transform: Affine, shape: tuple[int, int], bounds: BoundingBox, resolution: int) -> list:\n", + "def find_h3_res_best_fit(\n", + " transform: Affine, shape: tuple[int, int], bounds: BoundingBox, resolution: int\n", + ") -> list:\n", " result = []\n", " for scale_factor in (x for x in range(1, 400)):\n", " x_pix_size = transform.a * scale_factor\n", " y_pix_size = transform.e * scale_factor\n", "\n", - " shape = (int((bounds.right - bounds.left) / x_pix_size), int((bounds.bottom - bounds.top) / y_pix_size))\n", - " new_trans = Affine(x_pix_size, transform.b, transform.c, transform.d, y_pix_size, transform.f)\n", + " shape = (\n", + " int((bounds.right - bounds.left) / x_pix_size),\n", + " int((bounds.bottom - bounds.top) / y_pix_size),\n", + " )\n", + " new_trans = Affine(\n", + " x_pix_size, transform.b, transform.c, transform.d, y_pix_size, transform.f\n", + " )\n", "\n", " h3_res = nearest_h3_resolution(shape, new_trans, search_mode=\"min_diff\")\n", " result.append((scale_factor, x_pix_size, shape, h3_res))\n", @@ -601,10 +601,17 @@ } ], "source": [ - "with rio.open(\"../../h3_data_importer/data/woodpulp/gfw_plantations_woodpulp_harvest_ha_res.tif\") as src:\n", + "with rio.open(\n", + " \"../../h3_data_importer/data/woodpulp/gfw_plantations_woodpulp_harvest_ha_res.tif\"\n", + ") as src:\n", " show(src, interpolation=\"none\")\n", " df = raster_to_dataframe(\n", - " src.read(1), src.transform, h3_resolution=6, nodata_value=src.nodata, compacted=False, geo=True\n", + " src.read(1),\n", + " src.transform,\n", + " h3_resolution=6,\n", + " nodata_value=src.nodata,\n", + " compacted=False,\n", + " geo=True,\n", " )" ] }, @@ -654,7 +661,10 @@ "metadata": {}, "outputs": [], "source": [ - "df.to_file(\"../../h3_data_importer/data/woodpulp/gfw_plantations_woodpulp_harvest_ha_h3.geojson\", driver=\"GeoJSON\")" + "df.to_file(\n", + " \"../../h3_data_importer/data/woodpulp/gfw_plantations_woodpulp_harvest_ha_h3.geojson\",\n", + " driver=\"GeoJSON\",\n", + ")" ] }, { @@ -684,7 +694,9 @@ "metadata": {}, "outputs": [], "source": [ - "with rio.open(\"../../h3_data_importer/data/satelligence/Deforestation_Masked_2016-2022-10-01.tif\") as src:\n", + "with rio.open(\n", + " \"../../h3_data_importer/data/satelligence/Deforestation_Masked_2016-2022-10-01.tif\"\n", + ") as src:\n", " target = find_h3_res_best_fit(src.transform, src.shape, src.bounds, 6)" ] }, @@ -1236,7 +1248,9 @@ } ], "source": [ - "deforest_risk = deforest_risk.rio.reproject(\"EPSG:4326\", resolution=(0.00075, 0.00075), resampling=Resampling.sum)\n", + "deforest_risk = deforest_risk.rio.reproject(\n", + " \"EPSG:4326\", resolution=(0.00075, 0.00075), resampling=Resampling.sum\n", + ")\n", "deforest_risk" ] }, @@ -1247,7 +1261,7 @@ "metadata": {}, "outputs": [], "source": [ - "deforest_risk.rio.to_raster('../../h3_data_importer/data/satelligence/Deforestation_risk.tif')" + "deforest_risk.rio.to_raster(\"../../h3_data_importer/data/satelligence/Deforestation_risk.tif\")" ] }, { diff --git a/data/notebooks/Lab/1_biodiversity_indicator.ipynb b/data/notebooks/Lab/1_biodiversity_indicator.ipynb index e172e2df7..f1728ca7c 100644 --- a/data/notebooks/Lab/1_biodiversity_indicator.ipynb +++ b/data/notebooks/Lab/1_biodiversity_indicator.ipynb @@ -71,15 +71,14 @@ "outputs": [], "source": [ "## import libraries\n", + "import time\n", + "\n", "import geopandas as gpd\n", + "import matplotlib.pyplot as plt\n", "import pandas as pd\n", - "\n", "import rasterio as rio\n", "import rasterio.plot\n", - "import matplotlib.pyplot as plt\n", - "from rasterstats import zonal_stats\n", - "\n", - "import time" + "from rasterstats import zonal_stats" ] }, { @@ -315,7 +314,11 @@ } ], "source": [ - "df = pd.read_excel(r'../../datasets/raw/biodiversity_indicators/Ch6 PSLregional v01.xlsx', sheet_name='Transformation_Ecoregion', header=[3])\n", + "df = pd.read_excel(\n", + " r\"../../datasets/raw/biodiversity_indicators/Ch6 PSLregional v01.xlsx\",\n", + " sheet_name=\"Transformation_Ecoregion\",\n", + " header=[3],\n", + ")\n", "df.head()" ] }, @@ -408,7 +411,7 @@ ], "source": [ "## select for the moment annual crops\n", - "pdf_annual_crops =df[['eco_code', 'Median', 'lower 95%', 'upper 95%']]\n", + "pdf_annual_crops = df[[\"eco_code\", \"Median\", \"lower 95%\", \"upper 95%\"]]\n", "pdf_annual_crops.head()" ] }, @@ -636,7 +639,7 @@ ], "source": [ "## import the ecoregions data\n", - "ecoregions = gpd.read_file('../../datasets/raw/biodiversity_indicators/official/wwf_terr_ecos.shp')\n", + "ecoregions = gpd.read_file(\"../../datasets/raw/biodiversity_indicators/official/wwf_terr_ecos.shp\")\n", "ecoregions.head()" ] }, @@ -863,11 +866,7 @@ } ], "source": [ - "ecoregions_PDF = pd.merge(\n", - " pdf_annual_crops,\n", - " ecoregions,\n", - " how= 'inner',\n", - " on='eco_code')\n", + "ecoregions_PDF = pd.merge(pdf_annual_crops, ecoregions, how=\"inner\", on=\"eco_code\")\n", "ecoregions_PDF.head()" ] }, @@ -878,7 +877,7 @@ "metadata": {}, "outputs": [], "source": [ - "ecoregions_PDF = ecoregions_PDF.set_geometry('geometry')" + "ecoregions_PDF = ecoregions_PDF.set_geometry(\"geometry\")" ] }, { @@ -890,8 +889,8 @@ "source": [ "# export\n", "ecoregions_PDF.to_file(\n", - " '../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors.shp',\n", - " driver='ESRI Shapefile'\n", + " \"../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors.shp\",\n", + " driver=\"ESRI Shapefile\",\n", ")" ] }, @@ -1011,15 +1010,24 @@ } ], "source": [ - "#check calculated risk map\n", - "with rio.open('../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors_4326.tif') as src:\n", + "# check calculated risk map\n", + "with rio.open(\n", + " \"../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors_4326.tif\"\n", + ") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " #ax.set_ylim((-5,40))\n", - " #ax.set_xlim((60,100))\n", - " rio.plot.show(dat, vmin=2.8999999152285e-14, vmax=2.9376220100729e-12, cmap='Blues', ax=ax, transform=src.transform)\n", - " #test_gdf.plot(ax=ax, alpha=0.5, color='Orange', edgecolor='yellow')\n", - " ax.set_title('Regional taxa aggregated characetrization factors by ecoregion (PDF/m2 *yr)')" + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " # ax.set_ylim((-5,40))\n", + " # ax.set_xlim((60,100))\n", + " rio.plot.show(\n", + " dat,\n", + " vmin=2.8999999152285e-14,\n", + " vmax=2.9376220100729e-12,\n", + " cmap=\"Blues\",\n", + " ax=ax,\n", + " transform=src.transform,\n", + " )\n", + " # test_gdf.plot(ax=ax, alpha=0.5, color='Orange', edgecolor='yellow')\n", + " ax.set_title(\"Regional taxa aggregated characetrization factors by ecoregion (PDF/m2 *yr)\")" ] }, { @@ -1056,7 +1064,7 @@ } ], "source": [ - "#clip taxa aggregated characetrisation factors to deforestation extent \n", + "# clip taxa aggregated characetrisation factors to deforestation extent\n", "!gdal_translate -projwin 100.024523992 2.91915145 103.814423992 -1.12192855 -of GTiff '../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors_4326.tif' '../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors_4326_clipped.tif'" ] }, @@ -1091,8 +1099,10 @@ "metadata": {}, "outputs": [], "source": [ - "taxa_cf_4326 = '../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors_4326_30m.tif'\n", - "deforestation_4326 = '../../datasets/processed/deforestation_indicators/deforestation_risk_ha_2018.tif'" + "taxa_cf_4326 = \"../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors_4326_30m.tif\"\n", + "deforestation_4326 = (\n", + " \"../../datasets/processed/deforestation_indicators/deforestation_risk_ha_2018.tif\"\n", + ")" ] }, { @@ -1352,17 +1362,26 @@ } ], "source": [ - "#generate a cog with the biodiversity risk map\n", - "#check calculated risk map\n", - "with rio.open('../../datasets/processed/biodiversity_indicators/biodiversity_risk_cotton_epsg4326_PDF.tif') as src:\n", + "# generate a cog with the biodiversity risk map\n", + "# check calculated risk map\n", + "with rio.open(\n", + " \"../../datasets/processed/biodiversity_indicators/biodiversity_risk_cotton_epsg4326_PDF.tif\"\n", + ") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " #ax.set_ylim((-5,40))\n", - " #ax.set_xlim((60,100))\n", - " rio.plot.show(dat, vmin=2.8999999152285e-14, vmax=1.1684577784499e-11, cmap='Oranges', ax=ax, transform=src.transform)\n", - " #test_gdf.plot(ax=ax, alpha=0.5, color='Orange', edgecolor='yellow')\n", - " ax.set_title('Biodiversity loss due to land use change risk map (PDF/yr)')\n", - " \n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " # ax.set_ylim((-5,40))\n", + " # ax.set_xlim((60,100))\n", + " rio.plot.show(\n", + " dat,\n", + " vmin=2.8999999152285e-14,\n", + " vmax=1.1684577784499e-11,\n", + " cmap=\"Oranges\",\n", + " ax=ax,\n", + " transform=src.transform,\n", + " )\n", + " # test_gdf.plot(ax=ax, alpha=0.5, color='Orange', edgecolor='yellow')\n", + " ax.set_title(\"Biodiversity loss due to land use change risk map (PDF/yr)\")\n", + "\n", " # Dark red shows no data information\n", " # Beige shows 0 risk" ] @@ -1529,8 +1548,8 @@ } ], "source": [ - "#import test data and filter by commodity - cotton (as the deforestation risk is for cotton) and indonesia (as the sample data es for indonesia)\n", - "gdf = gpd.read_file('../../datasets/processed/user_data/located_lg_data_polygon_v2.shp')\n", + "# import test data and filter by commodity - cotton (as the deforestation risk is for cotton) and indonesia (as the sample data es for indonesia)\n", + "gdf = gpd.read_file(\"../../datasets/processed/user_data/located_lg_data_polygon_v2.shp\")\n", "gdf.head()" ] }, @@ -1606,7 +1625,7 @@ ], "source": [ "# lest assume that the risk map is for rubber - we will need to update this later on\n", - "gdf = gdf.loc[(gdf['Material']=='Rubber') & (gdf['Country']=='Indonesia')]\n", + "gdf = gdf.loc[(gdf[\"Material\"] == \"Rubber\") & (gdf[\"Country\"] == \"Indonesia\")]\n", "gdf" ] }, @@ -1617,8 +1636,12 @@ "metadata": {}, "outputs": [], "source": [ - "yield_rubber = '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare.tif'\n", - "harvest_portion_rubber = '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction.tif'" + "yield_rubber = (\n", + " \"../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare.tif\"\n", + ")\n", + "harvest_portion_rubber = (\n", + " \"../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction.tif\"\n", + ")" ] }, { @@ -1628,9 +1651,11 @@ "metadata": {}, "outputs": [], "source": [ - "#save test location\n", - "gdf.to_file('../../datasets/raw/input_data_test/indonesia_test_shape.shp',\n", - " driver='ESRI Shapefile',)" + "# save test location\n", + "gdf.to_file(\n", + " \"../../datasets/raw/input_data_test/indonesia_test_shape.shp\",\n", + " driver=\"ESRI Shapefile\",\n", + ")" ] }, { @@ -1668,7 +1693,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "## add projection - same as the other ones for the calculations\n", "!gdal_edit.py -a_srs EPSG:4326 '../../datasets/raw/input_data_test/indonesia_raster_volume.tif'" ] @@ -1690,7 +1714,7 @@ ], "source": [ "## clip data to deforestation extent\n", - "#clip harvest area fraction to deforestation extent \n", + "# clip harvest area fraction to deforestation extent\n", "!gdal_translate -projwin 100.024523992 2.91915145 103.814423992 -1.12192855 -of GTiff '../../datasets/raw/input_data_test/indonesia_raster_volume.tif' '../../datasets/raw/input_data_test/indonesia_raster_volume_4326_clipped.tif'" ] }, @@ -1713,7 +1737,7 @@ ], "source": [ "## downsample to deforestation resolution\n", - "!gdalwarp -s_srs EPSG:4326 -tr 0.000269494417976 0.000269494417976 -r near -of GTiff '../../datasets/raw/input_data_test/indonesia_raster_volume_4326_clipped.tif' '../../datasets/raw/input_data_test/indonesia_raster_volume_4326_30m.tif'\n" + "!gdalwarp -s_srs EPSG:4326 -tr 0.000269494417976 0.000269494417976 -r near -of GTiff '../../datasets/raw/input_data_test/indonesia_raster_volume_4326_clipped.tif' '../../datasets/raw/input_data_test/indonesia_raster_volume_4326_30m.tif'" ] }, { @@ -1734,7 +1758,7 @@ } ], "source": [ - "#reproject raster volume from epsg4326 to espg3857\n", + "# reproject raster volume from epsg4326 to espg3857\n", "!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:3857 -tr 30 30 -r near -of GTiff '../../datasets/raw/input_data_test/indonesia_raster_volume_4326_30m.tif' '../../datasets/raw/input_data_test/indonesia_raster_volume_3857_30m.tif'" ] }, @@ -1762,7 +1786,7 @@ } ], "source": [ - "#clip harvest area fraction to deforestation extent \n", + "# clip harvest area fraction to deforestation extent\n", "!gdal_translate -projwin 100.024523992 2.91915145 103.814423992 -1.12192855 -of GTiff $harvest_portion_rubber '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction_4326_clipped.tif'" ] }, @@ -1784,7 +1808,7 @@ "source": [ "## downsample harvest area fraction -as it's area independent we can downsample the values into smaller pixel sizes\n", "# downsample the clipped data\n", - "!gdalwarp -s_srs EPSG:4326 -tr 0.000269494417976 0.000269494417976 -r near -of GTiff '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction_4326_clipped.tif' '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction_4326_30m.tif'\n" + "!gdalwarp -s_srs EPSG:4326 -tr 0.000269494417976 0.000269494417976 -r near -of GTiff '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction_4326_clipped.tif' '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction_4326_30m.tif'" ] }, { @@ -1939,7 +1963,7 @@ } ], "source": [ - "#generate raster with pixel area raster\n", + "# generate raster with pixel area raster\n", "# reclasifies the raster into 0 and pixel area being the pixel area just on thise locations with harvest area fraction\n", "!gdal_calc.py -A '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction_3857_30m.tif' --outfile='../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/pixel_area_rubber_raster_epsg3857.tif' --calc=\"(A > 0) * (30*30)\"" ] @@ -2007,7 +2031,7 @@ ], "source": [ "gdf = gdf.set_crs(\"EPSG:4326\")\n", - "print(f'projection of user data is: {gdf.crs}')" + "print(f\"projection of user data is: {gdf.crs}\")" ] }, { @@ -2017,8 +2041,8 @@ "metadata": {}, "outputs": [], "source": [ - "#reproject the gdf to epsg3857 for the zonal statistics\n", - "#reproject to epsg3857\n", + "# reproject the gdf to epsg3857 for the zonal statistics\n", + "# reproject to epsg3857\n", "gdf = gdf.to_crs(\"EPSG:3857\")" ] }, @@ -2029,7 +2053,7 @@ "metadata": {}, "outputs": [], "source": [ - "gdf.to_file('../../datasets/processed/user_data/indonesia_test_3857.shp')" + "gdf.to_file(\"../../datasets/processed/user_data/indonesia_test_3857.shp\")" ] }, { @@ -2137,7 +2161,7 @@ } ], "source": [ - "gdf = gpd.read_file('../../datasets/raw/input_data_test/indonesia_test_shape_clip.shp')\n", + "gdf = gpd.read_file(\"../../datasets/raw/input_data_test/indonesia_test_shape_clip.shp\")\n", "gdf" ] }, @@ -2156,13 +2180,12 @@ } ], "source": [ - "#zonal stats in india to get the sum of all fraction harvest area\n", - "total_harves_area_rubber = '../../datasets/raw/probability_map/area_total_rubber_raster_epsg3857.tif'\n", + "# zonal stats in india to get the sum of all fraction harvest area\n", + "total_harves_area_rubber = (\n", + " \"../../datasets/raw/probability_map/area_total_rubber_raster_epsg3857.tif\"\n", + ")\n", "start_time = time.time()\n", - "zs_indonesia_test = zonal_stats(\n", - " gdf,\n", - " total_harves_area_rubber,\n", - " stats=\"sum\")\n", + "zs_indonesia_test = zonal_stats(gdf, total_harves_area_rubber, stats=\"sum\")\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -2181,7 +2204,7 @@ } ], "source": [ - "print(f' The total rubber harvest area in indonessua is :', {zs_indonesia_test[0]['sum']}, 'm2')" + "print(\" The total rubber harvest area in indonessua is :\", {zs_indonesia_test[0][\"sum\"]}, \"m2\")" ] }, { @@ -2263,7 +2286,7 @@ ], "source": [ "## ad field to gdf\n", - "gdf['Total_af'] = zs_indonesia_test[0]['sum']\n", + "gdf[\"Total_af\"] = zs_indonesia_test[0][\"sum\"]\n", "gdf" ] }, @@ -2274,7 +2297,7 @@ "metadata": {}, "outputs": [], "source": [ - "gdf.to_file('../../datasets/processed/user_data/indonesia_test_3857.shp')" + "gdf.to_file(\"../../datasets/processed/user_data/indonesia_test_3857.shp\")" ] }, { @@ -2293,7 +2316,7 @@ ], "source": [ "## generate a raster with same extent as the other ones with this total area fraction value\n", - "!gdal_rasterize -l indonesia_test_3857 -a Total_af -tr 30 30 -a_nodata 0.0 -ot Float32 -of GTiff '../../datasets/processed/user_data/indonesia_test_3857.shp' '../../datasets/raw/probability_map/indonesia_rubber_raster_total_af.tif'\n" + "!gdal_rasterize -l indonesia_test_3857 -a Total_af -tr 30 30 -a_nodata 0.0 -ot Float32 -of GTiff '../../datasets/processed/user_data/indonesia_test_3857.shp' '../../datasets/raw/probability_map/indonesia_rubber_raster_total_af.tif'" ] }, { @@ -2331,7 +2354,7 @@ } ], "source": [ - "#clip harvest area fraction to deforestation extent \n", + "# clip harvest area fraction to deforestation extent\n", "!gdal_translate -projwin 100.024523992 2.91915145 103.814423992 -1.12192855 -of GTiff '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare.tif' '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare_4326_clipped.tif'" ] }, @@ -2353,7 +2376,7 @@ "source": [ "## downsample harvest area fraction -as it's area independent we can downsample the values into smaller pixel sizes\n", "# downsample the clipped data\n", - "!gdalwarp -s_srs EPSG:4326 -tr 0.000269494417976 0.000269494417976 -r near -of GTiff '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare_4326_clipped.tif' '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare_4326_30m.tif'\n" + "!gdalwarp -s_srs EPSG:4326 -tr 0.000269494417976 0.000269494417976 -r near -of GTiff '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare_4326_clipped.tif' '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare_4326_30m.tif'" ] }, { @@ -2372,8 +2395,8 @@ } ], "source": [ - "#reproject yield from epsg4326 to epsg3857\n", - "#reproject raster volume from epsg4326 to espg3857\n", + "# reproject yield from epsg4326 to epsg3857\n", + "# reproject raster volume from epsg4326 to espg3857\n", "!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:3857 -tr 30 30 -r near -of GTiff '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare_4326_30m.tif' '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare_3857_30m.tif'" ] }, @@ -2639,8 +2662,8 @@ } ], "source": [ - "#fix extent od total area fraction raster\n", - "!gdal_translate -projwin 11131949.079 334111.171 11549399.079 -111328.829 -of GTiff '../../datasets/raw/probability_map/indonesia_rubber_raster_total_af.tif' '../../datasets/raw/probability_map/indonesia_rubber_raster_total_af_new_extent.tif'\n" + "# fix extent od total area fraction raster\n", + "!gdal_translate -projwin 11131949.079 334111.171 11549399.079 -111328.829 -of GTiff '../../datasets/raw/probability_map/indonesia_rubber_raster_total_af.tif' '../../datasets/raw/probability_map/indonesia_rubber_raster_total_af_new_extent.tif'" ] }, { @@ -2774,7 +2797,7 @@ } ], "source": [ - "#generate raster with pixel area raster\n", + "# generate raster with pixel area raster\n", "# reclasifies the raster into 0 and pixel area being the pixel area just on thise locations with harvest area fraction\n", "!gdal_calc.py -A '../../datasets/raw/input_data_test/indonesia_raster_volume_3857_30m.tif' -B '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction_3857_30m.tif' -C '../../datasets/raw/probability_map/indonesia_rubber_raster_total_af_new_extent.tif' -D '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare_3857_30m.tif' --outfile='../../datasets/processed/probability_map/purchase_area_distribution_rubber_indonesia_3857.tif' --calc=\"(A*B)/(C*D)\"" ] @@ -2887,13 +2910,22 @@ } ], "source": [ - "with rio.open('../../datasets/processed/probability_map/purchase_area_distribution_rubber_indonesia_3857.tif') as src:\n", + "with rio.open(\n", + " \"../../datasets/processed/probability_map/purchase_area_distribution_rubber_indonesia_3857.tif\"\n", + ") as src:\n", " image_array = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((-111328.8286,334111.1714))\n", - " ax.set_xlim((1.113195e+07,1.154940e+07))\n", - " rio.plot.show(image_array, vmin=7.6509659718837e-11, vmax=3.2353862778438e-08, cmap='Oranges', ax=ax, transform=src.transform)\n", - " ax.set_title('Geospatial responsibility - indonesia test')" + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((-111328.8286, 334111.1714))\n", + " ax.set_xlim((1.113195e07, 1.154940e07))\n", + " rio.plot.show(\n", + " image_array,\n", + " vmin=7.6509659718837e-11,\n", + " vmax=3.2353862778438e-08,\n", + " cmap=\"Oranges\",\n", + " ax=ax,\n", + " transform=src.transform,\n", + " )\n", + " ax.set_title(\"Geospatial responsibility - indonesia test\")" ] }, { @@ -3072,8 +3104,8 @@ } ], "source": [ - "#reproject biodiversity risk map from epsg4326 to epsg3857\n", - "#reproject raster volume from epsg4326 to espg3857\n", + "# reproject biodiversity risk map from epsg4326 to epsg3857\n", + "# reproject raster volume from epsg4326 to espg3857\n", "!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:3857 -tr 30 30 -r near -of GTiff '../../datasets/processed/biodiversity_indicators/biodiversity_risk_cotton_epsg4326_PDF.tif' '../../datasets/processed/biodiversity_indicators/biodiversity_risk_cotton_epsg3857_PDF.tif'" ] }, @@ -3231,13 +3263,17 @@ } ], "source": [ - "with rio.open('../../datasets/processed/biodiversity_indicators/biodiversity_loss_dueTo_landusechange_3857_30m.tif') as src:\n", + "with rio.open(\n", + " \"../../datasets/processed/biodiversity_indicators/biodiversity_loss_dueTo_landusechange_3857_30m.tif\"\n", + ") as src:\n", " image_array = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((-111328.8286,334111.1714))\n", - " ax.set_xlim((1.113195e+07,1.154940e+07))\n", - " rio.plot.show(image_array, vmin=0, vmax=3.6318996466515e+28, cmap='Oranges', ax=ax, transform=src.transform)\n", - " ax.set_title('BIodiversity impact - indonesia test')" + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((-111328.8286, 334111.1714))\n", + " ax.set_xlim((1.113195e07, 1.154940e07))\n", + " rio.plot.show(\n", + " image_array, vmin=0, vmax=3.6318996466515e28, cmap=\"Oranges\", ax=ax, transform=src.transform\n", + " )\n", + " ax.set_title(\"BIodiversity impact - indonesia test\")" ] }, { diff --git a/data/notebooks/Lab/2_water_use.ipynb b/data/notebooks/Lab/2_water_use.ipynb index 77094cb66..dab4473f5 100644 --- a/data/notebooks/Lab/2_water_use.ipynb +++ b/data/notebooks/Lab/2_water_use.ipynb @@ -81,32 +81,16 @@ "metadata": {}, "outputs": [], "source": [ + "import time\n", + "\n", "import geopandas as gpd\n", - "import pandas as pd\n", - "from shapely.geometry import Point\n", + "import matplotlib.pyplot as plt\n", "import rasterio as rio\n", "import rasterio.plot\n", - "import matplotlib.pyplot as plt\n", "from rasterio.plot import show_hist\n", - "import time\n", - "from rasterstats import gen_zonal_stats, gen_point_query\n", - "from shapely.geometry import shape, mapping\n", - "import folium\n", - "from rasterstats import gen_zonal_stats, gen_point_query\n", - "import h3\n", "from rasterstats import zonal_stats" ] }, - { - "cell_type": "code", - "execution_count": 87, - "id": "1ef1fd14", - "metadata": {}, - "outputs": [], - "source": [ - "from processing.geolocating_data import GeolocateAddress" - ] - }, { "cell_type": "markdown", "id": "baecd78d", @@ -138,8 +122,10 @@ "metadata": {}, "outputs": [], "source": [ - "blwf_path = '../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/hdr.adf'\n", - "ha_fraction_path = '../../datasets/raw/crop_data/cotton_HarvestedAreaFraction_epsg3857.tif'" + "blwf_path = (\n", + " \"../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/hdr.adf\"\n", + ")\n", + "ha_fraction_path = \"../../datasets/raw/crop_data/cotton_HarvestedAreaFraction_epsg3857.tif\"" ] }, { @@ -240,7 +226,7 @@ } ], "source": [ - "#explore datasets info for calculation - the three raster need to have the same extent and projection\n", + "# explore datasets info for calculation - the three raster need to have the same extent and projection\n", "!gdalinfo $blwf_path" ] }, @@ -481,7 +467,7 @@ ], "source": [ "pixel_area = 12051.131160772874864 * 12051.131160772874864\n", - "print(f'The pixel area of the reprojected raster is: {pixel_area} m2')" + "print(f\"The pixel area of the reprojected raster is: {pixel_area} m2\")" ] }, { @@ -500,7 +486,7 @@ ], "source": [ "# renormalised back by the pixel area\n", - "!gdal_calc.py -A '../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/wfbl_cotton_epsg3857.tif' --outfile='../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/wfbl_cotton_epsg3857_normalised.tif' --calc=\"A/145229762.254151\"\n" + "!gdal_calc.py -A '../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/wfbl_cotton_epsg3857.tif' --outfile='../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/wfbl_cotton_epsg3857_normalised.tif' --calc=\"A/145229762.254151\"" ] }, { @@ -617,7 +603,7 @@ ], "source": [ "pixel_area = 0.083333333333333 * 0.083333333333333\n", - "print(f'Pixel area in degrees: {pixel_area}')" + "print(f\"Pixel area in degrees: {pixel_area}\")" ] }, { @@ -868,8 +854,8 @@ } ], "source": [ - "# explore gdal info for nearest raster \n", - "!gdalinfo -stats -hist '../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/test_reprojections/wfbl_cotton_epsg3857_near.tif'\n" + "# explore gdal info for nearest raster\n", + "!gdalinfo -stats -hist '../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/test_reprojections/wfbl_cotton_epsg3857_near.tif'" ] }, { @@ -1020,8 +1006,10 @@ "metadata": {}, "outputs": [], "source": [ - "blwf_cotton = '../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/test_reprojections/wfbl_cotton_epsg3857_near.tif'\n", - "harvest_area_portion = '../../datasets/raw/crop_data/cotton_HarvestedAreaFraction_epsg3857_new_extent.tif'" + "blwf_cotton = \"../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/test_reprojections/wfbl_cotton_epsg3857_near.tif\"\n", + "harvest_area_portion = (\n", + " \"../../datasets/raw/crop_data/cotton_HarvestedAreaFraction_epsg3857_new_extent.tif\"\n", + ")" ] }, { @@ -1150,14 +1138,21 @@ } ], "source": [ - "with rio.open('../../datasets/processed/water_indicators/water_risk_cotton_epsg3857_v3.tif') as src:\n", + "with rio.open(\"../../datasets/processed/water_indicators/water_risk_cotton_epsg3857_v3.tif\") as src:\n", " image_array = src.read(1)\n", " msk = src.read_masks()\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " rio.plot.show(image_array, vmin=4.4836601744862e-05, vmax=0.14, cmap='Reds' , ax=ax, transform=src.transform)\n", - " ax.set_title('Geospatial responsibility - test location')\n", - " \n", - " #the dark red shows no data" + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " rio.plot.show(\n", + " image_array,\n", + " vmin=4.4836601744862e-05,\n", + " vmax=0.14,\n", + " cmap=\"Reds\",\n", + " ax=ax,\n", + " transform=src.transform,\n", + " )\n", + " ax.set_title(\"Geospatial responsibility - test location\")\n", + "\n", + " # the dark red shows no data" ] }, { @@ -1181,8 +1176,10 @@ "metadata": {}, "outputs": [], "source": [ - "water_risk = '../../datasets/processed/water_indicators/water_risk_cotton_epsg3857_v3.tif'\n", - "probability_area ='../../datasets/processed/probability_map/purchase_area_distribution_cotton_epsg3857.tif'\n" + "water_risk = \"../../datasets/processed/water_indicators/water_risk_cotton_epsg3857_v3.tif\"\n", + "probability_area = (\n", + " \"../../datasets/processed/probability_map/purchase_area_distribution_cotton_epsg3857.tif\"\n", + ")" ] }, { @@ -1468,17 +1465,21 @@ } ], "source": [ - "#check calculated risk map\n", - "with rio.open( '../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857.tif') as src:\n", + "# check calculated risk map\n", + "with rio.open(\n", + " \"../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857.tif\"\n", + ") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((695174.093781,4.255931e+06))\n", - " ax.set_xlim((7.582124e+06,1.084202e+07))\n", - " rio.plot.show(dat, vmin=0, vmax=7.9023620167261e-09, cmap='Oranges', ax=ax, transform=src.transform)\n", - " #gdf_india.plot(ax=ax, alpha=0.5, color='Orange', edgecolor='yellow')\n", - " ax.set_title('Unsustainable water use in India - test location')\n", - " \n", - " #dark red shows no data values" + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((695174.093781, 4.255931e06))\n", + " ax.set_xlim((7.582124e06, 1.084202e07))\n", + " rio.plot.show(\n", + " dat, vmin=0, vmax=7.9023620167261e-09, cmap=\"Oranges\", ax=ax, transform=src.transform\n", + " )\n", + " # gdf_india.plot(ax=ax, alpha=0.5, color='Orange', edgecolor='yellow')\n", + " ax.set_title(\"Unsustainable water use in India - test location\")\n", + "\n", + " # dark red shows no data values" ] }, { @@ -1502,11 +1503,14 @@ ], "source": [ "from rasterio.plot import show_hist\n", - "with rio.open( '../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857.tif') as src:\n", + "\n", + "with rio.open(\n", + " \"../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857.tif\"\n", + ") as src:\n", " dat = src.read(1)\n", " show_hist(\n", - " src, bins=10, lw=0, stacked=False, alpha=0.3,\n", - " histtype='stepfilled', title=\"Histogram\")" + " src, bins=10, lw=0, stacked=False, alpha=0.3, histtype=\"stepfilled\", title=\"Histogram\"\n", + " )" ] }, { @@ -1582,7 +1586,7 @@ } ], "source": [ - "test_location = gpd.read_file('../../datasets/raw/probability_map/test_location_epsg3857.shp')\n", + "test_location = gpd.read_file(\"../../datasets/raw/probability_map/test_location_epsg3857.shp\")\n", "test_location" ] }, @@ -1784,7 +1788,7 @@ "source": [ "## reproject raster to epsg3857\n", "# reproject the blue water footprint from epsg4326 to epsg3857\n", - "!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:3857 -tr 12051.131160772875 12051.131160772875 -r near -of GTiff '../../datasets/processed/water_indicators/water_risk_cotton_epsg4326.tif' '../../datasets/processed/water_indicators/water_risk_cotton_epsg3857_v4.tif'\n" + "!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:3857 -tr 12051.131160772875 12051.131160772875 -r near -of GTiff '../../datasets/processed/water_indicators/water_risk_cotton_epsg4326.tif' '../../datasets/processed/water_indicators/water_risk_cotton_epsg3857_v4.tif'" ] }, { @@ -1820,7 +1824,7 @@ ], "source": [ "## calculate matric using new reprojected layer\n", - "!gdal_calc.py -A '../../datasets/processed/water_indicators/water_risk_cotton_epsg3857_v4.tif' -B '../../datasets/processed/probability_map/purchase_area_distribution_cotton_epsg3857_new_extent.tif' --outfile='../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857_v2.tif' --calc=\"A*B\"\n" + "!gdal_calc.py -A '../../datasets/processed/water_indicators/water_risk_cotton_epsg3857_v4.tif' -B '../../datasets/processed/probability_map/purchase_area_distribution_cotton_epsg3857_new_extent.tif' --outfile='../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857_v2.tif' --calc=\"A*B\"" ] }, { @@ -1847,14 +1851,15 @@ ], "source": [ "## calculate zonal statistics in test location\n", - "water_metric_v1 = '../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857.tif'\n", - "water_metric_v2 = '../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857_v2.tif'\n", + "water_metric_v1 = (\n", + " \"../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857.tif\"\n", + ")\n", + "water_metric_v2 = (\n", + " \"../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857_v2.tif\"\n", + ")\n", "\n", "start_time = time.time()\n", - "zs_india_test_v1 = zonal_stats(\n", - " test_location,\n", - " water_metric_v1,\n", - " stats=\"sum\")\n", + "zs_india_test_v1 = zonal_stats(test_location, water_metric_v1, stats=\"sum\")\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -1865,7 +1870,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(f' water impact v1:', {zs_india_test[0]['sum']}, 'm2')" + "print(\" water impact v1:\", {zs_india_test[0][\"sum\"]}, \"m2\")" ] }, { @@ -1884,10 +1889,7 @@ ], "source": [ "start_time = time.time()\n", - "zs_india_test_v2 = zonal_stats(\n", - " test_location,\n", - " water_metric_v2,\n", - " stats=\"sum\")\n", + "zs_india_test_v2 = zonal_stats(test_location, water_metric_v2, stats=\"sum\")\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, diff --git a/data/notebooks/Lab/3_deforestation_risk.ipynb b/data/notebooks/Lab/3_deforestation_risk.ipynb index 8471f9716..9bf2cac56 100644 --- a/data/notebooks/Lab/3_deforestation_risk.ipynb +++ b/data/notebooks/Lab/3_deforestation_risk.ipynb @@ -34,30 +34,17 @@ "metadata": {}, "outputs": [], "source": [ + "import os\n", + "import time\n", + "\n", "import geopandas as gpd\n", + "import h3\n", + "import matplotlib.pyplot as plt\n", "import pandas as pd\n", - "from shapely.geometry import Point\n", "import rasterio as rio\n", "import rasterio.plot\n", - "import matplotlib.pyplot as plt\n", "from rasterio.plot import show_hist\n", - "import time\n", - "from rasterstats import gen_zonal_stats, gen_point_query\n", - "from shapely.geometry import shape, mapping\n", - "import folium\n", - "from rasterstats import gen_zonal_stats, gen_point_query\n", - "import h3\n", - "import os\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "c0b4cbb2", - "metadata": {}, - "outputs": [], - "source": [ - "from processing.geolocating_data import GeolocateAddress" + "from rasterstats import gen_zonal_stats" ] }, { @@ -104,7 +91,7 @@ } ], "source": [ - "input_path = '../../datasets/raw/satelligence_sample_data'\n", + "input_path = \"../../datasets/raw/satelligence_sample_data\"\n", "\n", "os.listdir(input_path)" ] @@ -116,8 +103,8 @@ "metadata": {}, "outputs": [], "source": [ - "baseline_path = input_path + '/forest_and_plantation_baseline'\n", - "change_path = input_path + '/change_detection'" + "baseline_path = input_path + \"/forest_and_plantation_baseline\"\n", + "change_path = input_path + \"/change_detection\"" ] }, { @@ -142,7 +129,7 @@ "source": [ "# Baseline Forest\n", "\n", - "files = [f\"/{f}\" for f in os.listdir(baseline_path) if '.tif' in f]\n", + "files = [f\"/{f}\" for f in os.listdir(baseline_path) if \".tif\" in f]\n", "files" ] }, @@ -209,7 +196,7 @@ "source": [ "file = baseline_path + files[1]\n", "\n", - "#explore datasets info for calculation - the three raster need to have the same extent and projection\n", + "# explore datasets info for calculation - the three raster need to have the same extent and projection\n", "baseline_info = !gdalinfo $file\n", "baseline_info" ] @@ -222,7 +209,8 @@ "outputs": [], "source": [ "from matplotlib.colors import ListedColormap\n", - "custom_cmap = ListedColormap(['#ffffff','#9bff8f','#73a367'])" + "\n", + "custom_cmap = ListedColormap([\"#ffffff\", \"#9bff8f\", \"#73a367\"])" ] }, { @@ -243,20 +231,18 @@ } ], "source": [ - "#check 2018 baseline\n", + "# check 2018 baseline\n", "\n", "# Legend\n", - "# 0 Non forest \n", + "# 0 Non forest\n", "# 1 Forest\n", "# 2 Primary Forest\n", "\n", "with rio.open(file) as src:\n", " image_array = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", " rio.plot.show(image_array, vmin=0, vmax=2, cmap=custom_cmap, ax=ax, transform=src.transform)\n", - " ax.set_title('Baseline tree cover 2018')\n", - " \n", - " " + " ax.set_title(\"Baseline tree cover 2018\")" ] }, { @@ -570,7 +556,7 @@ "source": [ "file = baseline_path + files[2]\n", "\n", - "#explore datasets info for calculation - the three raster need to have the same extent and projection\n", + "# explore datasets info for calculation - the three raster need to have the same extent and projection\n", "baseline_info = !gdalinfo $file\n", "baseline_info" ] @@ -593,9 +579,9 @@ } ], "source": [ - "#check calculated risk map\n", + "# check calculated risk map\n", "\n", - "custom_cmap = ListedColormap([\"darkgreen\",\"#5eb342\",\"#3dd00d\",\"#ffd60e\",\"darkorange\"])\n", + "custom_cmap = ListedColormap([\"darkgreen\", \"#5eb342\", \"#3dd00d\", \"#ffd60e\", \"darkorange\"])\n", "\n", "# Legend\n", "# 1 Primary Forest\n", @@ -606,11 +592,9 @@ "\n", "with rio.open(file) as src:\n", " image_array = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", " rio.plot.show(image_array, vmin=0, vmax=5, cmap=custom_cmap, ax=ax, transform=src.transform)\n", - " ax.set_title('Baseline tree cover 2019')\n", - " \n", - " " + " ax.set_title(\"Baseline tree cover 2019\")" ] }, { @@ -633,7 +617,7 @@ "source": [ "# Forest change\n", "\n", - "files = [f\"/{f}\" for f in os.listdir(change_path) if '.tif' in f]\n", + "files = [f\"/{f}\" for f in os.listdir(change_path) if \".tif\" in f]\n", "files" ] }, @@ -691,7 +675,7 @@ "source": [ "file = change_path + files[0]\n", "\n", - "#explore datasets info for calculation - the three raster need to have the same extent and projection\n", + "# explore datasets info for calculation - the three raster need to have the same extent and projection\n", "change_info = !gdalinfo $file\n", "change_info" ] @@ -714,7 +698,7 @@ } ], "source": [ - "#check calculated risk map\n", + "# check calculated risk map\n", "\n", "# Values in format YYYY-jjj e.g. 2019074 where jjj = julian day from 1-366\n", "\n", @@ -722,12 +706,10 @@ " image_array = src.read(1)\n", " meta = src.meta\n", " profile = src.profile\n", - " \n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " rio.plot.show(image_array, vmin=0, vmax=5, cmap='Blues', ax=ax, transform=src.transform)\n", - " ax.set_title('Change')\n", - " \n", - " " + "\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " rio.plot.show(image_array, vmin=0, vmax=5, cmap=\"Blues\", ax=ax, transform=src.transform)\n", + " ax.set_title(\"Change\")" ] }, { @@ -749,9 +731,7 @@ ], "source": [ "src = rio.open(file)\n", - "show_hist(\n", - " src, bins=50, lw=0.0, stacked=False, alpha=0.3,\n", - " histtype='stepfilled', title=\"Histogram\")" + "show_hist(src, bins=50, lw=0.0, stacked=False, alpha=0.3, histtype=\"stepfilled\", title=\"Histogram\")" ] }, { @@ -819,9 +799,9 @@ "source": [ "custom_cmap = ListedColormap([\"white\", \"#f69\"])\n", "\n", - "fig, ax = plt.subplots(figsize=[15,10])\n", + "fig, ax = plt.subplots(figsize=[15, 10])\n", "rio.plot.show(loss2018_array, vmin=0, vmax=1, cmap=custom_cmap, ax=ax, transform=src.transform)\n", - "ax.set_title('Change')" + "ax.set_title(\"Change\")" ] }, { @@ -854,7 +834,7 @@ } ], "source": [ - "output_path = '../../datasets/processed/'\n", + "output_path = \"../../datasets/processed/\"\n", "\n", "os.listdir(input_path)" ] @@ -896,10 +876,10 @@ "outputs": [], "source": [ "## Save as rio dataset\n", - "defor_path = output_path + 'sat_loss_2018.tif'\n", + "defor_path = output_path + \"sat_loss_2018.tif\"\n", "with rasterio.open(defor_path, \"w\", **profile) as dest:\n", " dest.write(loss2018_array, 1)\n", - " \n", + "\n", "loss2018_array = None" ] }, @@ -929,12 +909,10 @@ " image_array = src.read(1)\n", " meta = src.meta\n", " profile = src.profile\n", - " \n", - " fig, ax = plt.subplots(figsize=[15,10])\n", + "\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", " rio.plot.show(image_array, vmin=0, vmax=1, cmap=custom_cmap, ax=ax, transform=src.transform)\n", - " ax.set_title('Loss 2018')\n", - " \n", - " " + " ax.set_title(\"Loss 2018\")" ] }, { @@ -956,9 +934,7 @@ ], "source": [ "src = rio.open(defor_path)\n", - "show_hist(\n", - " src, bins=50, lw=0.0, stacked=False, alpha=0.3,\n", - " histtype='stepfilled', title=\"Histogram\")" + "show_hist(src, bins=50, lw=0.0, stacked=False, alpha=0.3, histtype=\"stepfilled\", title=\"Histogram\")" ] }, { @@ -993,7 +969,7 @@ "metadata": {}, "outputs": [], "source": [ - "ha_fraction_path = '../../datasets/raw/cotton_HarvestedAreaFraction.tif'" + "ha_fraction_path = \"../../datasets/raw/cotton_HarvestedAreaFraction.tif\"" ] }, { @@ -1074,7 +1050,7 @@ "## -of GTiff: geotiff we want to clip - change extent\n", "# e.g. -of GTiff \n", "\n", - "clipped_output = '../../datasets/processed/cotton_HarvestedAreaFraction_clipped.tif'\n", + "clipped_output = \"../../datasets/processed/cotton_HarvestedAreaFraction_clipped.tif\"\n", "\n", "!gdal_translate -projwin 100.0245300 2.9189000 103.8144300 -1.1221800 -of GTiff $ha_fraction_path $clipped_output" ] @@ -1152,10 +1128,7 @@ ], "source": [ "src = rio.open(clipped_output)\n", - "show_hist(\n", - " src,\n", - " bins=50, lw=0.0, stacked=False, alpha=0.3,\n", - " histtype='stepfilled', title=\"Histogram\")\n" + "show_hist(src, bins=50, lw=0.0, stacked=False, alpha=0.3, histtype=\"stepfilled\", title=\"Histogram\")" ] }, { @@ -1180,10 +1153,10 @@ " image_array = src.read(1)\n", " meta = src.meta\n", " profile = src.profile\n", - " \n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " rio.plot.show(image_array, vmin=0, vmax=4e-4, cmap='Blues', ax=ax, transform=src.transform)\n", - " ax.set_title('Clipped f')" + "\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " rio.plot.show(image_array, vmin=0, vmax=4e-4, cmap=\"Blues\", ax=ax, transform=src.transform)\n", + " ax.set_title(\"Clipped f\")" ] }, { @@ -1238,7 +1211,7 @@ } ], "source": [ - "resampled_output = '../../datasets/processed/cotton_HarvestedAreaFraction_resampled_30m.tif'\n", + "resampled_output = \"../../datasets/processed/cotton_HarvestedAreaFraction_resampled_30m.tif\"\n", "!gdalwarp -tr 0.000269494417976 0.000269495165055 -r near -of GTiff $clipped_output $resampled_output" ] }, @@ -1320,10 +1293,10 @@ " image_array = src.read(1)\n", " meta = src.meta\n", " profile = src.profile\n", - " \n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " rio.plot.show(image_array, vmin=0, vmax=4e-4, cmap='Blues', ax=ax, transform=src.transform)\n", - " ax.set_title('Resampled f')" + "\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " rio.plot.show(image_array, vmin=0, vmax=4e-4, cmap=\"Blues\", ax=ax, transform=src.transform)\n", + " ax.set_title(\"Resampled f\")" ] }, { @@ -1345,10 +1318,7 @@ ], "source": [ "src = rio.open(resampled_output)\n", - "show_hist(\n", - " src,\n", - " bins=50, lw=0.0, stacked=False, alpha=0.3,\n", - " histtype='stepfilled', title=\"Histogram\")\n" + "show_hist(src, bins=50, lw=0.0, stacked=False, alpha=0.3, histtype=\"stepfilled\", title=\"Histogram\")" ] }, { @@ -1396,8 +1366,8 @@ "source": [ "## Had to clip both to match\n", "\n", - "defor_clipped_output = '../../datasets/processed/defor_clipped.tif'\n", - "reclipped_output = '../../datasets/processed/cotton_HarvestedAreaFraction_resampled_clipped.tif'\n", + "defor_clipped_output = \"../../datasets/processed/defor_clipped.tif\"\n", + "reclipped_output = \"../../datasets/processed/cotton_HarvestedAreaFraction_resampled_clipped.tif\"\n", "\n", "!gdal_translate -projwin 100.0245300 2.9189000 103.8144300 -1.1221800 -of GTiff $defor_path $defor_clipped_output\n", "!gdal_translate -projwin 100.0245300 2.9189000 103.8144300 -1.1221800 -of GTiff $resampled_output $reclipped_output" @@ -1441,7 +1411,7 @@ } ], "source": [ - "input_path = '../../datasets/raw/satelligence_sample_data'\n", + "input_path = \"../../datasets/raw/satelligence_sample_data\"\n", "\n", "os.listdir(input_path)" ] @@ -1453,7 +1423,7 @@ "metadata": {}, "outputs": [], "source": [ - "aoi_path = input_path + '/IND_Riau_outline.shp'" + "aoi_path = input_path + \"/IND_Riau_outline.shp\"" ] }, { @@ -1553,10 +1523,10 @@ } ], "source": [ - "area_path = '../../datasets/processed/idn_pixel_area.tif'\n", + "area_path = \"../../datasets/processed/idn_pixel_area.tif\"\n", "\n", "## Naive assumption that all pixels are constant area (ignore proj for now)\n", - "pixel_area = 30*30*1e-4\n", + "pixel_area = 30 * 30 * 1e-4\n", "pixel_area" ] }, @@ -1576,8 +1546,8 @@ } ], "source": [ - "##Create area raster \n", - "!gdal_rasterize -l IND_Riau_outline -burn $pixel_area -tr 0.000269494417976 0.000269495165055 -a_nodata 0.0 -te 100.0245300 -1.1221800 103.8144300 2.9189000 -ot Float32 -of GTiff $aoi_path $area_path\n" + "##Create area raster\n", + "!gdal_rasterize -l IND_Riau_outline -burn $pixel_area -tr 0.000269494417976 0.000269495165055 -a_nodata 0.0 -te 100.0245300 -1.1221800 103.8144300 2.9189000 -ot Float32 -of GTiff $aoi_path $area_path" ] }, { @@ -1602,10 +1572,10 @@ " image_array = src.read(1)\n", " meta = src.meta\n", " profile = src.profile\n", - " \n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " rio.plot.show(image_array, vmin=0, vmax=4e-4, cmap='Blues', ax=ax, transform=src.transform)\n", - " ax.set_title('Area')" + "\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " rio.plot.show(image_array, vmin=0, vmax=4e-4, cmap=\"Blues\", ax=ax, transform=src.transform)\n", + " ax.set_title(\"Area\")" ] }, { @@ -1619,12 +1589,12 @@ " hf_array = src.read(1)\n", " meta1 = src.meta\n", " profile1 = src.profile\n", - " \n", + "\n", "with rio.open(reclipped_output) as src:\n", " defor_array = src.read(1)\n", " meta2 = src.meta\n", " profile2 = src.profile\n", - " \n", + "\n", "with rio.open(area_path) as src:\n", " area_array = src.read(1)\n", " meta3 = src.meta\n", @@ -1727,10 +1697,10 @@ } ], "source": [ - "fig, ax = plt.subplots(figsize=[15,10])\n", - "rio.plot.show(risk_array, vmin=0, vmax=1e-5, cmap='Reds', ax=ax, transform=src.transform)\n", - "df.plot(ax=ax, alpha=0.3, color='#ffffff', edgecolor='black')\n", - "ax.set_title('Risk')" + "fig, ax = plt.subplots(figsize=[15, 10])\n", + "rio.plot.show(risk_array, vmin=0, vmax=1e-5, cmap=\"Reds\", ax=ax, transform=src.transform)\n", + "df.plot(ax=ax, alpha=0.3, color=\"#ffffff\", edgecolor=\"black\")\n", + "ax.set_title(\"Risk\")" ] }, { @@ -1774,8 +1744,8 @@ ], "source": [ "show_hist(\n", - " risk_array, bins=50, lw=0.0, stacked=False, alpha=0.3,\n", - " histtype='stepfilled', title=\"Histogram\")\n" + " risk_array, bins=50, lw=0.0, stacked=False, alpha=0.3, histtype=\"stepfilled\", title=\"Histogram\"\n", + ")" ] }, { @@ -1787,9 +1757,9 @@ "source": [ "## Save as rio dataset\n", "\n", - "with rasterio.open(output_path + 'deforestation_risk_ha_2018.tif', \"w\", **profile2) as dest:\n", + "with rasterio.open(output_path + \"deforestation_risk_ha_2018.tif\", \"w\", **profile2) as dest:\n", " dest.write(risk_array, 1)\n", - " \n", + "\n", "risk_array = None" ] }, @@ -1838,7 +1808,7 @@ "\n", "## Generate f/y (raster)\n", "\n", - "## Generate V/At (raster) \n", + "## Generate V/At (raster)\n", "\n", "## Generate f' (raster) = V/At * f/y" ] @@ -1867,25 +1837,27 @@ "outputs": [], "source": [ "# generate h3 for india file\n", + "\n", + "\n", "def generate_h3_features(geometry, res):\n", " \"\"\"\n", " Generate h3 for geometry\n", - " \n", + "\n", " Input\n", " ------\n", " geometry: shapely.polygon or shapely.multipolygon\n", - " \n", + "\n", " Output\n", " ------\n", " gdf with H3_hexes\n", " \"\"\"\n", " # Create an empty dataframe to write data into\n", - " h3_df = pd.DataFrame([],columns=['h3_id'])\n", - " if geometry.geom_type == 'MultiPolygon':\n", + " pd.DataFrame([], columns=[\"h3_id\"])\n", + " if geometry.geom_type == \"MultiPolygon\":\n", " district_polygon = list(geometry)\n", " for polygon in district_polygon:\n", " poly_geojson = gpd.GeoSeries([polygon]).__geo_interface__\n", - " poly_geojson = poly_geojson['features'][0]['geometry'] \n", + " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n", " h3_hexes = h3.polyfill_geojson(poly_geojson, res)\n", " for h3_hex in h3_hexes:\n", " coords = h3.h3_set_to_multi_polygon([h3_hex], geo_json=True)\n", @@ -1894,9 +1866,9 @@ " \"properties\": {\"hexid\": h3_hex},\n", " \"geometry\": {\"type\": \"Polygon\", \"coordinates\": coords[0]},\n", " }\n", - " elif geometry.geom_type == 'Polygon':\n", + " elif geometry.geom_type == \"Polygon\":\n", " poly_geojson = gpd.GeoSeries(geometry).__geo_interface__\n", - " poly_geojson = poly_geojson['features'][0]['geometry']\n", + " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n", " h3_hexes = h3.polyfill_geojson(poly_geojson, res)\n", " for h3_hex in h3_hexes:\n", " coords = h3.h3_set_to_multi_polygon([h3_hex], geo_json=True)\n", @@ -1906,7 +1878,7 @@ " \"geometry\": {\"type\": \"Polygon\", \"coordinates\": coords[0]},\n", " }\n", " else:\n", - " print('Shape is not a polygon or multypolygon.')\n" + " print(\"Shape is not a polygon or multypolygon.\")" ] }, { @@ -1924,9 +1896,9 @@ } ], "source": [ - "#time spend in generating the features in h3 for the basins test in resolution 1\n", + "# time spend in generating the features in h3 for the basins test in resolution 1\n", "start_time = time.time()\n", - "h3_idn_res6 = [generate_h3_features(poly, 6) for poly in df['geometry']]\n", + "h3_idn_res6 = [generate_h3_features(poly, 6) for poly in df[\"geometry\"]]\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -1945,20 +1917,18 @@ } ], "source": [ - "## zonal statistics with harvest area protion to calculate distribution \n", - "#summary statistics world main basins\n", + "## zonal statistics with harvest area protion to calculate distribution\n", + "# summary statistics world main basins\n", "start_time = time.time()\n", "\n", - "summ_stats_h3_idn = [gen_zonal_stats(\n", - " generator,\n", - " risk_path,\n", - " stats=\"sum\",\n", - " prefix=\"m_\",\n", - " geojson_out=True,\n", - " all_touched=True\n", - " ) for generator in h3_idn_res6]\n", - " \n", - " \n", + "summ_stats_h3_idn = [\n", + " gen_zonal_stats(\n", + " generator, risk_path, stats=\"sum\", prefix=\"m_\", geojson_out=True, all_touched=True\n", + " )\n", + " for generator in h3_idn_res6\n", + "]\n", + "\n", + "\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -1986,14 +1956,15 @@ ], "source": [ "## generate datafram\n", - "#generate a dataframe with the elements\n", + "# generate a dataframe with the elements\n", "start_time = time.time()\n", - "h3_gdf_idn_res6 = pd.DataFrame([],columns=['h3_id', 'area_fraction'])\n", + "h3_gdf_idn_res6 = pd.DataFrame([], columns=[\"h3_id\", \"area_fraction\"])\n", "for generator in summ_stats_h3_idn:\n", " for feature in generator:\n", - " h3_gdf_idn_res6.loc[len(h3_gdf_idn_res6)]=[\n", - " feature['properties']['hexid'],\n", - " feature['properties']['m_sum']]\n", + " h3_gdf_idn_res6.loc[len(h3_gdf_idn_res6)] = [\n", + " feature[\"properties\"][\"hexid\"],\n", + " feature[\"properties\"][\"m_sum\"],\n", + " ]\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -2104,10 +2075,10 @@ } ], "source": [ - "#remove nans\n", - "h3_gdf_idn_res6['Volume'] = 25\n", + "# remove nans\n", + "h3_gdf_idn_res6[\"Volume\"] = 25\n", "h3_gdf_idn_res6 = h3_gdf_idn_res6.dropna()\n", - "h3_gdf_idn_res6['total_af'] = sum(list(h3_gdf_idn_res6['area_fraction']))\n", + "h3_gdf_idn_res6[\"total_af\"] = sum(list(h3_gdf_idn_res6[\"area_fraction\"]))\n", "h3_gdf_idn_res6.head()" ] }, @@ -2220,10 +2191,10 @@ "source": [ "## calculate asignation fraction as area fraction / total_af\n", "for i, row in h3_gdf_idn_res6.iterrows():\n", - " assignation_factor = row['area_fraction']/ row['total_af']\n", - " distributed_volume = row['Volume']*assignation_factor\n", - " h3_gdf_idn_res6.loc[i, 'assignation_factor'] = assignation_factor\n", - " h3_gdf_idn_res6.loc[i, 'distributed_vol'] = distributed_volume\n", + " assignation_factor = row[\"area_fraction\"] / row[\"total_af\"]\n", + " distributed_volume = row[\"Volume\"] * assignation_factor\n", + " h3_gdf_idn_res6.loc[i, \"assignation_factor\"] = assignation_factor\n", + " h3_gdf_idn_res6.loc[i, \"distributed_vol\"] = distributed_volume\n", "\n", "h3_gdf_idn_res6.head()" ] @@ -2248,8 +2219,12 @@ "metadata": {}, "outputs": [], "source": [ - "deforestation_area = '../../datasets/processed/deforestation_indicators/deforestation_risk_ha_2018_new_extent.tif'\n", - "probability_purchase_area = '../../datasets/processed/probability_map/purchase_area_distribution_rubber_indonesia_3857.tif'" + "deforestation_area = (\n", + " \"../../datasets/processed/deforestation_indicators/deforestation_risk_ha_2018_new_extent.tif\"\n", + ")\n", + "probability_purchase_area = (\n", + " \"../../datasets/processed/probability_map/purchase_area_distribution_rubber_indonesia_3857.tif\"\n", + ")" ] }, { @@ -2317,9 +2292,9 @@ } ], "source": [ - "#reproject deforestation from epsg4326 to epsg3857\n", - "#reproject biodiversity risk map from epsg4326 to epsg3857\n", - "#reproject raster volume from epsg4326 to espg3857\n", + "# reproject deforestation from epsg4326 to epsg3857\n", + "# reproject biodiversity risk map from epsg4326 to epsg3857\n", + "# reproject raster volume from epsg4326 to espg3857\n", "!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:3857 -tr 30 30 -r near -of GTiff '../../datasets/processed/deforestation_indicators/deforestation_risk_ha_2018_new_extent.tif' '../../datasets/processed/deforestation_indicators/deforestation_risk_ha_2018_new_extent_3857.tif'" ] }, @@ -2366,7 +2341,7 @@ } ], "source": [ - "#calculate deforestation impact metric\n", + "# calculate deforestation impact metric\n", "!gdal_calc.py -A '../../datasets/processed/deforestation_indicators/deforestation_risk_ha_2018_new_extent_3857.tif' -B $probability_purchase_area --outfile='../../datasets/processed/deforestation_indicators/deforestation_metric_ha_2018_3857_test_location.tif' --calc=\"A*B\"" ] }, @@ -2478,13 +2453,22 @@ } ], "source": [ - "with rio.open('../../datasets/processed/deforestation_indicators/deforestation_metric_ha_2018_3857_test_location.tif') as src:\n", + "with rio.open(\n", + " \"../../datasets/processed/deforestation_indicators/deforestation_metric_ha_2018_3857_test_location.tif\"\n", + ") as src:\n", " image_array = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((-111328.8286,334111.1714))\n", - " ax.set_xlim((1.113195e+07,1.154940e+07))\n", - " rio.plot.show(image_array, vmin=0, vmax=2.8455618937551e-12, cmap='Oranges', ax=ax, transform=src.transform)\n", - " ax.set_title('Deforestation impact - indonesia test')" + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((-111328.8286, 334111.1714))\n", + " ax.set_xlim((1.113195e07, 1.154940e07))\n", + " rio.plot.show(\n", + " image_array,\n", + " vmin=0,\n", + " vmax=2.8455618937551e-12,\n", + " cmap=\"Oranges\",\n", + " ax=ax,\n", + " transform=src.transform,\n", + " )\n", + " ax.set_title(\"Deforestation impact - indonesia test\")" ] }, { diff --git a/data/notebooks/Lab/3_mask_forest_loss_lesiv.ipynb b/data/notebooks/Lab/3_mask_forest_loss_lesiv.ipynb index 525b4a63c..ead957de4 100644 --- a/data/notebooks/Lab/3_mask_forest_loss_lesiv.ipynb +++ b/data/notebooks/Lab/3_mask_forest_loss_lesiv.ipynb @@ -54,22 +54,22 @@ "outputs": [], "source": [ "# insert code here\n", - "import rasterio\n", - "from rasterio.plot import show, plotting_extent\n", - "from rasterio.warp import calculate_default_transform, reproject, Resampling\n", - "from rasterio.mask import mask\n", - "from rasterio import Affine\n", - "import numpy as np\n", + "import math\n", + "from dataclasses import dataclass\n", + "from typing import List\n", + "\n", + "import geopandas as gpd\n", "import matplotlib.pyplot as plt\n", "import matplotlib.ticker as ticker\n", - "import geopandas as gpd\n", + "import numpy as np\n", "import pandas as pd\n", + "import rasterio\n", "import rasterstats\n", "import shapely\n", - "import math\n", - "\n", - "from dataclasses import dataclass\n", - "from typing import List, Optional" + "from rasterio import Affine\n", + "from rasterio.mask import mask\n", + "from rasterio.plot import plotting_extent\n", + "from rasterio.warp import Resampling, calculate_default_transform, reproject" ] }, { @@ -187,70 +187,80 @@ "metadata": {}, "outputs": [], "source": [ - "def plot_raster(raster, title, cmap='viridis'):\n", + "def plot_raster(raster, title, cmap=\"viridis\"):\n", " \"\"\"Plot a raster using matplotlib\"\"\"\n", " fig, ax = plt.subplots(figsize=(10, 10))\n", - " #show(raster, ax=ax, cmap=cmap, title=title)\n", + " # show(raster, ax=ax, cmap=cmap, title=title)\n", " ax.imshow(raster, cmap=cmap)\n", " ax.set_title(title)\n", " plt.show()\n", "\n", - "def plot_multi_raster(rasters, titles, extent_data = None, cmap='viridis'):\n", + "\n", + "def plot_multi_raster(rasters, titles, extent_data=None, cmap=\"viridis\"):\n", " \"\"\"Plot multiple rasters using matplotlib\"\"\"\n", " fig, axes = plt.subplots(1, len(rasters), figsize=(20, 20))\n", " for i, (raster, title) in enumerate(zip(rasters, titles)):\n", " pcm = axes[i].imshow(raster, cmap=cmap, extent=extent_data)\n", " cax = axes[i].inset_axes([1.04, 0, 0.02, 1])\n", - " axes[i].set_title(title, fontweight='bold')\n", + " axes[i].set_title(title, fontweight=\"bold\")\n", " fig.colorbar(pcm, ax=axes[i], shrink=0.6, cax=cax)\n", " plt.show()\n", "\n", - "def plot_multi_raster_with_gdf(rasters: List[RasterParams], titles, gdf, extent_data = None, cmap='viridis'):\n", + "\n", + "def plot_multi_raster_with_gdf(\n", + " rasters: List[RasterParams], titles, gdf, extent_data=None, cmap=\"viridis\"\n", + "):\n", " \"\"\"Plot multiple rasters using matplotlib\"\"\"\n", " fig, axes = plt.subplots(1, len(rasters), figsize=(20, 20))\n", " for i, (raster, title) in enumerate(zip(rasters, titles)):\n", - " pcm = axes[i].imshow(raster.data, cmap=cmap, extent=plotting_extent(raster.data, raster.transform))\n", + " pcm = axes[i].imshow(\n", + " raster.data, cmap=cmap, extent=plotting_extent(raster.data, raster.transform)\n", + " )\n", " cax = axes[i].inset_axes([1.04, 0, 0.02, 1])\n", - " axes[i].set_title(title, fontweight='bold')\n", + " axes[i].set_title(title, fontweight=\"bold\")\n", " fig.colorbar(pcm, ax=axes[i], shrink=0.6, cax=cax)\n", - " gdf.plot(ax=axes[i], facecolor='none', edgecolor='red', linewidth=2)\n", + " gdf.plot(ax=axes[i], facecolor=\"none\", edgecolor=\"red\", linewidth=2)\n", " plt.show()\n", "\n", + "\n", "def plot_raster_histogram(raster, title, bins=100):\n", " \"\"\"Plot a histogram of a raster\"\"\"\n", " fig, ax = plt.subplots(figsize=(10, 5))\n", " ax.hist(raster.flatten(), bins=bins)\n", - " ax.set_title(title, fontweight='bold')\n", + " ax.set_title(title, fontweight=\"bold\")\n", " plt.show()\n", "\n", + "\n", "def plot_multi_raster_histogram(rasters, titles, bins=100):\n", " \"\"\"Plot multiple histograms of a raster\"\"\"\n", - " fig, axes = plt.subplots(1, len(rasters), figsize=(20, 5) )\n", + " fig, axes = plt.subplots(1, len(rasters), figsize=(20, 5))\n", " for i, (raster, title) in enumerate(zip(rasters, titles)):\n", " axes[i].hist(raster.flatten(), bins=bins)\n", - " axes[i].set_title(title, fontweight='bold')\n", - " \n", + " axes[i].set_title(title, fontweight=\"bold\")\n", + "\n", " plt.show()\n", "\n", + "\n", "def year_formatter(x, pos):\n", " \"\"\"Format the x axis of a plot to show years\"\"\"\n", - " return f'{int(x+2000)}'\n", + " return f\"{int(x+2000)}\"\n", + "\n", "\n", "def plot_multi_loss_bar(rasters, titles, formater=year_formatter):\n", " \"\"\"Plot multiple histograms of a raster\"\"\"\n", - " fig, axes = plt.subplots(1, len(rasters), figsize=(20, 5),sharey=True)\n", + " fig, axes = plt.subplots(1, len(rasters), figsize=(20, 5), sharey=True)\n", " for i, (raster, title) in enumerate(zip(rasters, titles)):\n", " data = np.unique(raster.flatten(), return_counts=True)\n", " axes[i].bar(*data)\n", " axes[i].xaxis.set_major_formatter(formater)\n", " axes[i].xaxis.set_major_locator(ticker.AutoLocator())\n", - " axes[i].set_title(title, fontweight='bold')\n", + " axes[i].set_title(title, fontweight=\"bold\")\n", " plt.show()\n", "\n", + "\n", "def bbox_to_array(bbox):\n", " \"\"\"Convert a bounding box to a numpy array\"\"\"\n", - " return [bbox.left, bbox.bottom, bbox.right, bbox.top]\n", - "\n" + " return [bbox.left, bbox.bottom, bbox.right, bbox.top]" ] }, { @@ -259,8 +269,15 @@ "metadata": {}, "outputs": [], "source": [ - "def resample_raster(dataset, scale, window: rasterio.windows.Window=None, band=1, masked=True, resampling=rasterio.enums.Resampling.nearest):\n", - " \"\"\" Resample a raster\n", + "def resample_raster(\n", + " dataset,\n", + " scale,\n", + " window: rasterio.windows.Window = None,\n", + " band=1,\n", + " masked=True,\n", + " resampling=rasterio.enums.Resampling.nearest,\n", + "):\n", + " \"\"\"Resample a raster\n", " multiply the pixel size by the scale factor\n", " divide the dimensions by the scale factor\n", " i.e\n", @@ -284,99 +301,108 @@ " # rescale the metadata\n", " # scale image transform\n", " t = dataset.transform\n", - " transform = t * t.scale((1/scale), (1/scale))\n", + " transform = t * t.scale((1 / scale), (1 / scale))\n", " height = math.ceil((window.height if window else dataset.height) * scale)\n", " width = math.ceil((window.width if window else dataset.width) * scale)\n", "\n", " profile = dataset.profile.copy()\n", - " profile.update(transform=transform, driver='GTiff', height=height, width=width)\n", + " profile.update(transform=transform, driver=\"GTiff\", height=height, width=width)\n", "\n", " data = dataset.read(\n", - " band, window=window, masked=masked,\n", - " out_shape=(dataset.count, height, width),\n", - " resampling=resampling,\n", - " )\n", + " band,\n", + " window=window,\n", + " masked=masked,\n", + " out_shape=(dataset.count, height, width),\n", + " resampling=resampling,\n", + " )\n", "\n", " return data, profile\n", "\n", - "def resample_array(data: np.ndarray, profile, scale, band=1, masked=True, resampling=rasterio.enums.Resampling.nearest):\n", - " \"\"\" Resample a raster\n", - " multiply the pixel size by the scale factor\n", - " divide the dimensions by the scale factor\n", - " i.e\n", - " given a pixel size of 250m, dimensions of (1024, 1024) and a scale of 2,\n", - " the resampled raster would have an output pixel size of 500m and dimensions of (512, 512)\n", - " given a pixel size of 250m, dimensions of (1024, 1024) and a scale of 0.5,\n", - " the resampled raster would have an output pixel size of 125m and dimensions of (2048, 2048)\n", - " returns a DatasetReader instance from either a filesystem raster or MemoryFile (if out_path is None)\n", + "\n", + "def resample_array(\n", + " data: np.ndarray,\n", + " profile,\n", + " scale,\n", + " band=1,\n", + " masked=True,\n", + " resampling=rasterio.enums.Resampling.nearest,\n", + "):\n", + " \"\"\"Resample a raster\n", + " multiply the pixel size by the scale factor\n", + " divide the dimensions by the scale factor\n", + " i.e\n", + " given a pixel size of 250m, dimensions of (1024, 1024) and a scale of 2,\n", + " the resampled raster would have an output pixel size of 500m and dimensions of (512, 512)\n", + " given a pixel size of 250m, dimensions of (1024, 1024) and a scale of 0.5,\n", + " the resampled raster would have an output pixel size of 125m and dimensions of (2048, 2048)\n", + " returns a DatasetReader instance from either a filesystem raster or MemoryFile (if out_path is None)\n", " \"\"\"\n", " # rescale the metadata\n", " # scale image transform\n", - " t = profile.get('transform')\n", - " transform = t * t.scale((1/scale), (1/scale))\n", + " t = profile.get(\"transform\")\n", + " transform = t * t.scale((1 / scale), (1 / scale))\n", " print(transform)\n", - " height = int(profile.get('height') * scale)\n", - " width = int(profile.get('width') * scale)\n", + " height = int(profile.get(\"height\") * scale)\n", + " width = int(profile.get(\"width\") * scale)\n", " print(height, width)\n", " print(t.scale((scale), (scale)))\n", - " crs = profile.get('crs')\n", - " no_data = profile.get('nodata')\n", + " crs = profile.get(\"crs\")\n", + " no_data = profile.get(\"nodata\")\n", "\n", " new_profile = profile.copy()\n", "\n", - " new_data, new_affine = reproject( source=data,\n", - " destination=np.zeros((band, height, width), dtype=data.dtype),\n", - " src_transform=t,\n", - " src_crs=crs,\n", - " dst_crs=crs,\n", - " dst_nodata=no_data,\n", - " dst_transform=transform,\n", - " resampling=resampling)\n", - " \n", - " new_profile.update({\"driver\": \"GTiff\",\n", - " \"height\": height,\n", - " \"width\": width,\n", - " \"transform\": new_affine})\n", + " new_data, new_affine = reproject(\n", + " source=data,\n", + " destination=np.zeros((band, height, width), dtype=data.dtype),\n", + " src_transform=t,\n", + " src_crs=crs,\n", + " dst_crs=crs,\n", + " dst_nodata=no_data,\n", + " dst_transform=transform,\n", + " resampling=resampling,\n", + " )\n", + "\n", + " new_profile.update(\n", + " {\"driver\": \"GTiff\", \"height\": height, \"width\": width, \"transform\": new_affine}\n", + " )\n", + "\n", + " return new_data[band - 1], new_profile\n", "\n", - " return new_data[band-1], new_profile\n", "\n", "def corregister_raster(infile, match, outfile):\n", - " \"\"\"Reproject a file to match the shape and projection of existing raster. \n", - " \n", + " \"\"\"Reproject a file to match the shape and projection of existing raster.\n", + "\n", " Parameters\n", " ----------\n", " infile : (string) path to input file to reproject\n", - " match : (string) path to raster with desired shape and projection \n", + " match : (string) path to raster with desired shape and projection\n", " outfile : (string) path to output file tif\n", " \"\"\"\n", " # open input\n", " with rasterio.open(infile) as src:\n", " src_transform = src.transform\n", - " \n", + "\n", " # open input to match\n", " with rasterio.open(match) as match:\n", " dst_crs = match.crs\n", - " \n", + "\n", " # calculate the output transform matrix\n", " dst_transform, dst_width, dst_height = calculate_default_transform(\n", - " src.crs, # input CRS\n", - " dst_crs, # output CRS\n", - " match.width, # input width\n", - " match.height, # input height \n", + " src.crs, # input CRS\n", + " dst_crs, # output CRS\n", + " match.width, # input width\n", + " match.height, # input height\n", " *match.bounds, # unpacks input outer boundaries (left, bottom, right, top)\n", " )\n", "\n", " # set properties for output\n", " kwargs = src.meta.copy()\n", - " kwargs.update({\n", - " 'crs': dst_crs,\n", - " 'transform': dst_transform,\n", - " 'width': dst_width,\n", - " 'height': dst_height\n", - " })\n", + " kwargs.update(\n", + " {\"crs\": dst_crs, \"transform\": dst_transform, \"width\": dst_width, \"height\": dst_height}\n", + " )\n", "\n", " # reproject the data\n", - " with rasterio.open(outfile, 'w', **kwargs) as dst:\n", + " with rasterio.open(outfile, \"w\", **kwargs) as dst:\n", " for i in range(1, src.count + 1):\n", " reproject(\n", " source=rasterio.band(src, i),\n", @@ -385,7 +411,9 @@ " src_crs=src.crs,\n", " dst_transform=dst_transform,\n", " dst_crs=dst_crs,\n", - " resampling=Resampling.nearest)\n", + " resampling=Resampling.nearest,\n", + " )\n", + "\n", "\n", "def save_raster(data, profile, out_path):\n", " \"\"\"Save a raster to disk\n", @@ -395,7 +423,7 @@ " profile (dict): rasterio profile\n", " out_path (str): output path\n", " \"\"\"\n", - " with rasterio.open(out_path, 'w', **profile) as dst:\n", + " with rasterio.open(out_path, \"w\", **profile) as dst:\n", " dst.write(data, 1)" ] }, @@ -406,15 +434,15 @@ "outputs": [], "source": [ "# Read the forest loss file\n", - "loss_data = rasterio.open('./Hansen_GFC-2021-v1.9_lossyear_00N_060W.tif', \"r+\")\n", - "manage_data = rasterio.open('./FML_v3-2_with-colorbar.tif', \"r+\")\n", + "loss_data = rasterio.open(\"./Hansen_GFC-2021-v1.9_lossyear_00N_060W.tif\", \"r+\")\n", + "manage_data = rasterio.open(\"./FML_v3-2_with-colorbar.tif\", \"r+\")\n", "\n", "# Get the bounding box of the loss raster as is the smallest one.\n", "bbox_loss = bbox_to_array(loss_data.bounds)\n", "polygon = [shapely.geometry.box(*bbox_loss, ccw=True)]\n", "\n", "# Read the shapefile and clip it with the raster bounding box\n", - "aois = gpd.read_file('./layers/POLYGON.shp').clip(polygon[0])\n", + "aois = gpd.read_file(\"./layers/POLYGON.shp\").clip(polygon[0])\n", "\n", "# Get the coordinates of the shapefile\n", "coords = aois.bounds\n", @@ -461,16 +489,22 @@ ], "source": [ "# lets first plot the data focus on the areas of interest\n", - "test_loss, affine_loss = mask(loss_data, aois.envelope, indexes=1, crop=True, all_touched=True, nodata=0)\n", - "test_mask, affine_mask = mask(manage_data, aois.envelope, indexes=1, crop=True, all_touched=True, nodata=-128)\n", + "test_loss, affine_loss = mask(\n", + " loss_data, aois.envelope, indexes=1, crop=True, all_touched=True, nodata=0\n", + ")\n", + "test_mask, affine_mask = mask(\n", + " manage_data, aois.envelope, indexes=1, crop=True, all_touched=True, nodata=-128\n", + ")\n", "print(test_loss.shape, test_mask.shape)\n", - "plot_multi_raster_with_gdf([\n", - " RasterParams(test_loss, affine_loss, loss_data.crs), \n", - " RasterParams(test_mask,affine_mask, loss_data.crs)\n", - " ],\n", - " ['Forest Loss', 'Managed Forest'], \n", - " aois)\n", - "plot_multi_raster_histogram([test_loss, test_mask], ['Forest Loss', 'Managed Forest'])" + "plot_multi_raster_with_gdf(\n", + " [\n", + " RasterParams(test_loss, affine_loss, loss_data.crs),\n", + " RasterParams(test_mask, affine_mask, loss_data.crs),\n", + " ],\n", + " [\"Forest Loss\", \"Managed Forest\"],\n", + " aois,\n", + ")\n", + "plot_multi_raster_histogram([test_loss, test_mask], [\"Forest Loss\", \"Managed Forest\"])" ] }, { @@ -481,17 +515,21 @@ "source": [ "# we will need upsampling first the managed forest raster to the same resolution as the loss raster\n", "mask_meta = manage_data.meta.copy()\n", - "mask_meta.update({\"driver\": \"GTiff\",\n", - " \"height\": test_mask.shape[0],\n", - " \"width\": test_mask.shape[1],\n", - " \"transform\": affine_mask})\n", + "mask_meta.update(\n", + " {\n", + " \"driver\": \"GTiff\",\n", + " \"height\": test_mask.shape[0],\n", + " \"width\": test_mask.shape[1],\n", + " \"transform\": affine_mask,\n", + " }\n", + ")\n", "# get the resolution of the loss raster\n", "loss_res = loss_data.res\n", "# get the resolution of the managed forest raster\n", "manage_res = manage_data.res\n", "# calculate the upsampling factor\n", - "upscale_factor_x = manage_res[0]/loss_res[0]\n", - "upscale_factor_y = manage_res[1]/loss_res[1]\n", + "upscale_factor_x = manage_res[0] / loss_res[0]\n", + "upscale_factor_y = manage_res[1] / loss_res[1]\n", "mask_window = manage_data.window(*coords.values[0])" ] }, @@ -502,7 +540,9 @@ "outputs": [], "source": [ "# resample the managed forest raster\n", - "mask_resampled, mask_profile = resample_raster(manage_data, upscale_factor_x, window=mask_window, masked=True)" + "mask_resampled, mask_profile = resample_raster(\n", + " manage_data, upscale_factor_x, window=mask_window, masked=True\n", + ")" ] }, { @@ -530,7 +570,14 @@ ], "source": [ "print(test_loss.shape, mask_resampled.shape)\n", - "plot_multi_raster([np.ma.masked_where(test_loss== 0, test_loss, copy=True), np.ma.masked_where(mask_resampled < 12, mask_resampled, copy=True)],['Forest Loss', 'Managed Forest'], cmap='rainbow')" + "plot_multi_raster(\n", + " [\n", + " np.ma.masked_where(test_loss == 0, test_loss, copy=True),\n", + " np.ma.masked_where(mask_resampled < 12, mask_resampled, copy=True),\n", + " ],\n", + " [\"Forest Loss\", \"Managed Forest\"],\n", + " cmap=\"rainbow\",\n", + ")" ] }, { @@ -564,8 +611,10 @@ "forest_loss = np.ma.masked_where(test_loss == 0, test_loss, copy=True)\n", "masked_loss = np.ma.masked_where(mask_resampled > 11, forest_loss, copy=True)\n", "\n", - "plot_multi_raster([forest_loss, masked_loss],['Forest Loss', 'Masked Managed Forest'])\n", - "plot_multi_loss_bar([forest_loss.compressed(), masked_loss.compressed()], ['Forest Loss', 'Masked Managed Forest'])" + "plot_multi_raster([forest_loss, masked_loss], [\"Forest Loss\", \"Masked Managed Forest\"])\n", + "plot_multi_loss_bar(\n", + " [forest_loss.compressed(), masked_loss.compressed()], [\"Forest Loss\", \"Masked Managed Forest\"]\n", + ")" ] }, { @@ -583,17 +632,20 @@ "outputs": [], "source": [ "profile_loss = loss_data.profile\n", - "profile_loss.update({\"driver\": \"GTiff\",\n", - " \"height\": masked_loss.shape[0],\n", - " \"width\": masked_loss.shape[1],\n", - " \"transform\": affine_loss})\n", + "profile_loss.update(\n", + " {\n", + " \"driver\": \"GTiff\",\n", + " \"height\": masked_loss.shape[0],\n", + " \"width\": masked_loss.shape[1],\n", + " \"transform\": affine_loss,\n", + " }\n", + ")\n", "# TODO: the transform is not correct in the mask profile (to be review why) so we need to update it\n", - "mask_profile.update({\"driver\": \"GTiff\",\n", - " \"transform\": affine_loss})\n", + "mask_profile.update({\"driver\": \"GTiff\", \"transform\": affine_loss})\n", "\n", - "save_raster(masked_loss, profile_loss, './masked_loss.tif')\n", - "save_raster(forest_loss, profile_loss, './forest_loss.tif')\n", - "save_raster(mask_resampled, mask_profile, './mask_resampled.tif')" + "save_raster(masked_loss, profile_loss, \"./masked_loss.tif\")\n", + "save_raster(forest_loss, profile_loss, \"./forest_loss.tif\")\n", + "save_raster(mask_resampled, mask_profile, \"./mask_resampled.tif\")" ] }, { @@ -652,8 +704,15 @@ } ], "source": [ - "reduction_in_deforested = round((len(forest_loss.compressed())- len(masked_loss.compressed()))/len(forest_loss.compressed()),3)*100\n", - "print(f'The reduction in deforestation is {reduction_in_deforested}%')" + "reduction_in_deforested = (\n", + " round(\n", + " (len(forest_loss.compressed()) - len(masked_loss.compressed()))\n", + " / len(forest_loss.compressed()),\n", + " 3,\n", + " )\n", + " * 100\n", + ")\n", + "print(f\"The reduction in deforestation is {reduction_in_deforested}%\")" ] }, { @@ -758,15 +817,18 @@ } ], "source": [ - "\n", - "loss_aois = rasterstats.zonal_stats(aois, forest_loss.filled(0), affine=affine_loss, stats=['count'], nodata=0)\n", - "loss_aois_masked = rasterstats.zonal_stats(aois, masked_loss.filled(0), affine=affine_loss, stats=['count'], nodata=0)\n", - "\n", - "areas =[]\n", + "loss_aois = rasterstats.zonal_stats(\n", + " aois, forest_loss.filled(0), affine=affine_loss, stats=[\"count\"], nodata=0\n", + ")\n", + "loss_aois_masked = rasterstats.zonal_stats(\n", + " aois, masked_loss.filled(0), affine=affine_loss, stats=[\"count\"], nodata=0\n", + ")\n", + "\n", + "areas = []\n", "for data_loss_data, data_loss_masked in zip(loss_aois, loss_aois_masked):\n", " data_loss = {}\n", - " data_loss['difference'] = data_loss_data['count'] - data_loss_masked['count']\n", - " data_loss['reduction'] = round((data_loss['difference']/data_loss_data['count'])*100,2)\n", + " data_loss[\"difference\"] = data_loss_data[\"count\"] - data_loss_masked[\"count\"]\n", + " data_loss[\"reduction\"] = round((data_loss[\"difference\"] / data_loss_data[\"count\"]) * 100, 2)\n", " areas.append(data_loss)\n", "\n", "pd.DataFrame(areas).describe()" diff --git a/data/notebooks/Lab/4_carbon_indicator_v2.ipynb b/data/notebooks/Lab/4_carbon_indicator_v2.ipynb index 152286e04..cb22c5775 100644 --- a/data/notebooks/Lab/4_carbon_indicator_v2.ipynb +++ b/data/notebooks/Lab/4_carbon_indicator_v2.ipynb @@ -60,15 +60,14 @@ "outputs": [], "source": [ "# import lib\n", - "import os\n", "import io\n", - "import requests\n", + "import os\n", "import zipfile\n", "\n", + "import matplotlib.pyplot as plt\n", "import rasterio as rio\n", "import rasterio.plot\n", - "import matplotlib.pyplot as plt\n", - "from rasterio.plot import show_hist" + "import requests" ] }, { @@ -112,8 +111,10 @@ } ], "source": [ - "print('Net flux:')\n", - "filepath = 'https://tiles.globalforestwatch.org/gfw_forest_carbon_net_flux/v20210331/tcd_30/4/12/7.png'\n", + "print(\"Net flux:\")\n", + "filepath = (\n", + " \"https://tiles.globalforestwatch.org/gfw_forest_carbon_net_flux/v20210331/tcd_30/4/12/7.png\"\n", + ")\n", "with rio.open(filepath) as src:\n", " print(src.profile)" ] @@ -138,15 +139,17 @@ } ], "source": [ - "#check calculated risk map\n", - "with rio.open( 'https://tiles.globalforestwatch.org/gfw_forest_carbon_net_flux/v20210331/tcd_30/4/12/7.png') as src:\n", + "# check calculated risk map\n", + "with rio.open(\n", + " \"https://tiles.globalforestwatch.org/gfw_forest_carbon_net_flux/v20210331/tcd_30/4/12/7.png\"\n", + ") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " #ax.set_ylim((-5,40))\n", - " #ax.set_xlim((60,100))\n", - " rio.plot.show(dat, vmin=-10, vmax=350, cmap='YlGnBu', ax=ax, transform=src.transform)\n", - " #gdf_india.plot(ax=ax, alpha=0.5, color='Orange', edgecolor='yellow')\n", - " ax.set_title('Carbon net flux tile')" + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " # ax.set_ylim((-5,40))\n", + " # ax.set_xlim((60,100))\n", + " rio.plot.show(dat, vmin=-10, vmax=350, cmap=\"YlGnBu\", ax=ax, transform=src.transform)\n", + " # gdf_india.plot(ax=ax, alpha=0.5, color='Orange', edgecolor='yellow')\n", + " ax.set_title(\"Carbon net flux tile\")" ] }, { @@ -175,16 +178,16 @@ ], "source": [ "# download carbon emissions dataset from earthstat\n", - "url = 'https://s3.us-east-2.amazonaws.com/earthstatdata/GHGEmissions_Geotiff.zip'\n", - "local_path = '../../datasets/raw/carbon_indicators'\n", + "url = \"https://s3.us-east-2.amazonaws.com/earthstatdata/GHGEmissions_Geotiff.zip\"\n", + "local_path = \"../../datasets/raw/carbon_indicators\"\n", "if not os.path.isdir(local_path):\n", " os.mkdir(local_path)\n", - "print('Downloading dataset...')\n", + "print(\"Downloading dataset...\")\n", "r = requests.get(url)\n", "z = zipfile.ZipFile(io.BytesIO(r.content))\n", "print(\"Done\")\n", - "z.extractall(path=local_path) # extract to folder\n", - "filenames = [y for y in sorted(z.namelist()) for ending in ['tif'] if y.endswith(ending)] \n", + "z.extractall(path=local_path) # extract to folder\n", + "filenames = [y for y in sorted(z.namelist()) for ending in [\"tif\"] if y.endswith(ending)]\n", "print(filenames)" ] }, @@ -263,13 +266,13 @@ ], "source": [ "## visualize raster with rasterio\n", - "with rio.open( local_path + '/GHGEmissions_Geotiff/total_emissions.tif') as src:\n", + "with rio.open(local_path + \"/GHGEmissions_Geotiff/total_emissions.tif\") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " #ax.set_ylim((-5,40))\n", - " #ax.set_xlim((60,100))\n", - " rio.plot.show(dat, vmin=0, vmax=444, cmap='Greens', ax=ax, transform=src.transform)\n", - " ax.set_title('Total Carbon Emissions from cropland - earthstat')" + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " # ax.set_ylim((-5,40))\n", + " # ax.set_xlim((60,100))\n", + " rio.plot.show(dat, vmin=0, vmax=444, cmap=\"Greens\", ax=ax, transform=src.transform)\n", + " ax.set_title(\"Total Carbon Emissions from cropland - earthstat\")" ] }, { @@ -361,8 +364,8 @@ } ], "source": [ - "deforestation_path = '../../datasets/processed/deforestation_indicators'\n", - "!gdalinfo -stats -hist $deforestation_path/'deforestation_risk_ha_2018.tif'\n" + "deforestation_path = \"../../datasets/processed/deforestation_indicators\"\n", + "!gdalinfo -stats -hist $deforestation_path/'deforestation_risk_ha_2018.tif'" ] }, { @@ -386,13 +389,20 @@ ], "source": [ "## visualize raster with rasterio\n", - "with rio.open( deforestation_path + '/deforestation_risk_ha_2018.tif') as src:\n", + "with rio.open(deforestation_path + \"/deforestation_risk_ha_2018.tif\") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((-1.1219285,2.9191515))\n", - " ax.set_xlim((100.0245240,103.8144240))\n", - " rio.plot.show(dat, vmin=1.9673679705447e-07, vmax=3.3993426768575e-05, cmap='Oranges', ax=ax, transform=src.transform)\n", - " ax.set_title('Deforestation in test location with satelligence data')" + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((-1.1219285, 2.9191515))\n", + " ax.set_xlim((100.0245240, 103.8144240))\n", + " rio.plot.show(\n", + " dat,\n", + " vmin=1.9673679705447e-07,\n", + " vmax=3.3993426768575e-05,\n", + " cmap=\"Oranges\",\n", + " ax=ax,\n", + " transform=src.transform,\n", + " )\n", + " ax.set_title(\"Deforestation in test location with satelligence data\")" ] }, { @@ -454,7 +464,7 @@ } ], "source": [ - "#get the total emissions by area unit\n", + "# get the total emissions by area unit\n", "!gdal_calc.py -A '../../datasets/raw/carbon_indicators/GHGEmissions_Geotiff/total_emissions_clipped_4326.tif' --outfile='../../datasets/raw/carbon_indicators/GHGEmissions_Geotiff/total_emissions_clipped_4326_byArea.tif' --calc=\"A*(0.083333333333329*0.083333333333329)\"" ] }, @@ -523,7 +533,7 @@ } ], "source": [ - "#normalise the downsample values by the pixel area\n", + "# normalise the downsample values by the pixel area\n", "!gdal_calc.py -A '../../datasets/raw/carbon_indicators/GHGEmissions_Geotiff/total_emissions_clipped_4326_byArea_30m.tif' --outfile='../../datasets/raw/carbon_indicators/GHGEmissions_Geotiff/total_emissions_clipped_4326_30m.tif' --calc=\"A/(0.000269494417976*0.000269494417976)\"" ] }, @@ -641,7 +651,7 @@ } ], "source": [ - "#calculate the ri\n", + "# calculate the ri\n", "!gdal_calc.py -A '../../datasets/processed/deforestation_indicators/deforestation_risk_ha_2018_new_xxtent_bio.tif' -B '../../datasets/raw/carbon_indicators/GHGEmissions_Geotiff/total_emissions_clipped_4326_30m.tif' --outfile='../../datasets/processed/carbon_indicators/carbon_risk_cotton_epsg4326_tCO2e_v2.tif' --calc=\"A*(B)\"" ] }, @@ -726,13 +736,15 @@ ], "source": [ "## visualize raster with rasterio\n", - "with rio.open('../../datasets/processed/carbon_indicators/carbon_risk_cotton_epsg4326_tCO2e_v2.tif') as src:\n", + "with rio.open(\n", + " \"../../datasets/processed/carbon_indicators/carbon_risk_cotton_epsg4326_tCO2e_v2.tif\"\n", + ") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((-1.1219285,2.9191515))\n", - " ax.set_xlim((100.0245240,103.8144240))\n", - " rio.plot.show(dat, vmin=0, vmax=822.80045408519, cmap='Oranges', ax=ax, transform=src.transform)\n", - " ax.set_title('Carbon emissions risk in cotton due to land use change tCO2')" + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((-1.1219285, 2.9191515))\n", + " ax.set_xlim((100.0245240, 103.8144240))\n", + " rio.plot.show(dat, vmin=0, vmax=822.80045408519, cmap=\"Oranges\", ax=ax, transform=src.transform)\n", + " ax.set_title(\"Carbon emissions risk in cotton due to land use change tCO2\")" ] }, { @@ -869,13 +881,17 @@ ], "source": [ "## visualize raster with rasterio\n", - "with rio.open('../../datasets/processed/probability_map/purchase_area_distribution_rubber_indonesia_3857.tif') as src:\n", + "with rio.open(\n", + " \"../../datasets/processed/probability_map/purchase_area_distribution_rubber_indonesia_3857.tif\"\n", + ") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((-111328.8286,334111.1714))\n", - " ax.set_xlim((1.113195e+07,1.154940e+07))\n", - " rio.plot.show(dat, vmin=0, vmax=3.2353862778438e-08, cmap='Oranges', ax=ax, transform=src.transform)\n", - " ax.set_title('Probability purchase area - test location')" + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((-111328.8286, 334111.1714))\n", + " ax.set_xlim((1.113195e07, 1.154940e07))\n", + " rio.plot.show(\n", + " dat, vmin=0, vmax=3.2353862778438e-08, cmap=\"Oranges\", ax=ax, transform=src.transform\n", + " )\n", + " ax.set_title(\"Probability purchase area - test location\")" ] }, { @@ -943,7 +959,7 @@ ], "source": [ "# reproject risk map from epsg4326 to epsg 3857\n", - "#get the total emissions by area unit\n", + "# get the total emissions by area unit\n", "!gdal_calc.py -A '../../datasets/processed/carbon_indicators/carbon_risk_cotton_epsg4326_tCO2e_v2.tif' --outfile='../../datasets/processed/carbon_indicators/carbon_risk_cotton_epsg4326_tCO2e_v2_byArea.tif' --calc=\"A*(0.000269494417976*0.000269494417976)\"" ] }, @@ -965,7 +981,7 @@ } ], "source": [ - "#reproject raster\n", + "# reproject raster\n", "!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:3857 -tr 30 30 -r near -of GTiff '../../datasets/processed/carbon_indicators/carbon_risk_cotton_epsg4326_tCO2e_v2_byArea.tif' '../../datasets/processed/carbon_indicators/carbon_risk_cotton_epsg3857_tCO2e_v2_byArea.tif'" ] }, @@ -1012,7 +1028,7 @@ } ], "source": [ - "#renormalise by pixel area\n", + "# renormalise by pixel area\n", "!gdal_calc.py -A '../../datasets/processed/carbon_indicators/carbon_risk_cotton_epsg3857_tCO2e_v2_byArea.tif' --outfile='../../datasets/processed/carbon_indicators/carbon_risk_cotton_epsg3857_tCO2e.tif' --calc=\"A/(30*30)\"" ] }, @@ -1101,7 +1117,7 @@ } ], "source": [ - "#check extendion \n", + "# check extendion\n", "!gdalinfo -stats -hist '../../datasets/processed/carbon_indicators/carbon_risk_cotton_epsg3857_tCO2e.tif'" ] }, @@ -1238,7 +1254,7 @@ ], "source": [ "## calculate impact metric by multiplying the probability area and the risk map\n", - "#calculate the ri\n", + "# calculate the ri\n", "!gdal_calc.py -A '../../datasets/processed/probability_map/purchase_area_distribution_rubber_indonesia_3857.tif' -B '../../datasets/processed/carbon_indicators/carbon_risk_cotton_epsg3857_tCO2e.tif' --outfile='../../datasets/processed/carbon_indicators/carbon_metric_cotton_test_location_tCO2.tif' --calc=\"A*B\"" ] }, @@ -1351,13 +1367,17 @@ ], "source": [ "## visualize raster with rasterio\n", - "with rio.open('../../datasets/processed/probability_map/purchase_area_distribution_rubber_indonesia_3857.tif') as src:\n", + "with rio.open(\n", + " \"../../datasets/processed/probability_map/purchase_area_distribution_rubber_indonesia_3857.tif\"\n", + ") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((-111328.8286,334111.1714))\n", - " ax.set_xlim((1.113195e+07,1.154940e+07))\n", - " rio.plot.show(dat, vmin=0, vmax=6.0517936148474e-12, cmap='Oranges', ax=ax, transform=src.transform)\n", - " ax.set_title('Carbon emissions due to land use change in test location -tCO2')" + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((-111328.8286, 334111.1714))\n", + " ax.set_xlim((1.113195e07, 1.154940e07))\n", + " rio.plot.show(\n", + " dat, vmin=0, vmax=6.0517936148474e-12, cmap=\"Oranges\", ax=ax, transform=src.transform\n", + " )\n", + " ax.set_title(\"Carbon emissions due to land use change in test location -tCO2\")" ] }, { @@ -1378,13 +1398,13 @@ "metadata": {}, "outputs": [], "source": [ - "import h3\n", + "import time\n", + "\n", "import geopandas as gpd\n", + "import h3\n", "import pandas as pd\n", - "import time\n", "from rasterstats import gen_zonal_stats\n", - "from shapely.geometry import shape\n", - "import json" + "from shapely.geometry import shape" ] }, { @@ -1397,22 +1417,22 @@ "def generate_h3_features(geometry, res):\n", " \"\"\"\n", " Generate h3 for geometry\n", - " \n", + "\n", " Input\n", " ------\n", " geometry: shapely.polygon or shapely.multipolygon\n", - " \n", + "\n", " Output\n", " ------\n", " gdf with H3_hexes\n", " \"\"\"\n", " # Create an empty dataframe to write data into\n", - " h3_df = pd.DataFrame([],columns=['h3_id'])\n", - " if geometry.geom_type == 'MultiPolygon':\n", + " pd.DataFrame([], columns=[\"h3_id\"])\n", + " if geometry.geom_type == \"MultiPolygon\":\n", " district_polygon = list(geometry)\n", " for polygon in district_polygon:\n", " poly_geojson = gpd.GeoSeries([polygon]).__geo_interface__\n", - " poly_geojson = poly_geojson['features'][0]['geometry'] \n", + " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n", " h3_hexes = h3.polyfill_geojson(poly_geojson, res)\n", " for h3_hex in h3_hexes:\n", " coords = h3.h3_set_to_multi_polygon([h3_hex], geo_json=True)\n", @@ -1421,9 +1441,9 @@ " \"properties\": {\"hexid\": h3_hex},\n", " \"geometry\": {\"type\": \"Polygon\", \"coordinates\": coords[0]},\n", " }\n", - " elif geometry.geom_type == 'Polygon':\n", + " elif geometry.geom_type == \"Polygon\":\n", " poly_geojson = gpd.GeoSeries(geometry).__geo_interface__\n", - " poly_geojson = poly_geojson['features'][0]['geometry']\n", + " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n", " h3_hexes = h3.polyfill_geojson(poly_geojson, res)\n", " for h3_hex in h3_hexes:\n", " coords = h3.h3_set_to_multi_polygon([h3_hex], geo_json=True)\n", @@ -1433,7 +1453,7 @@ " \"geometry\": {\"type\": \"Polygon\", \"coordinates\": coords[0]},\n", " }\n", " else:\n", - " print('Shape is not a polygon or multypolygon.')" + " print(\"Shape is not a polygon or multypolygon.\")" ] }, { @@ -1510,7 +1530,7 @@ ], "source": [ "## import geometry to generate the h3 dataset - indonesia in our case\n", - "indonesia_gdf = gpd.read_file('../../datasets/raw/input_data_test/indonesia_test_shape_clip.shp')\n", + "indonesia_gdf = gpd.read_file(\"../../datasets/raw/input_data_test/indonesia_test_shape_clip.shp\")\n", "indonesia_gdf" ] }, @@ -1576,7 +1596,7 @@ } ], "source": [ - "geom = indonesia_gdf['geometry'][0]\n", + "geom = indonesia_gdf[\"geometry\"][0]\n", "geom" ] }, @@ -1599,7 +1619,7 @@ "# risk map\n", "start_time = time.time()\n", "hexbin_generator = generate_h3_features(geom, 7)\n", - "raster_path = '../../datasets/processed/carbon_indicators/carbon_risk_cotton_epsg4326_tCO2e_v2.tif'\n", + "raster_path = \"../../datasets/processed/carbon_indicators/carbon_risk_cotton_epsg4326_tCO2e_v2.tif\"\n", "\n", "raster_stats_h3 = gen_zonal_stats(\n", " hexbin_generator,\n", @@ -1607,10 +1627,9 @@ " stats=\"sum\",\n", " prefix=\"carbon_risk_cotton_\",\n", " geojson_out=True,\n", - " all_touched=True\n", + " all_touched=True,\n", ")\n", - "print(\"--- %s seconds ---\" % (time.time() - start_time))\n", - "\n" + "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, { @@ -1628,7 +1647,7 @@ } ], "source": [ - "#check output\n", + "# check output\n", "for feature in raster_stats_h3:\n", " print(feature)\n", " break" @@ -1657,15 +1676,15 @@ } ], "source": [ - "#generate a dataframe with the elements\n", + "# generate a dataframe with the elements\n", "start_time = time.time()\n", - "h3_gdf_carbon_risk = pd.DataFrame([],columns=['h3_id', 'carbon_risk_cotton_sum', 'geometry'])\n", + "h3_gdf_carbon_risk = pd.DataFrame([], columns=[\"h3_id\", \"carbon_risk_cotton_sum\", \"geometry\"])\n", "for feature in raster_stats_h3:\n", - " h3_gdf_carbon_risk.loc[len(h3_gdf_carbon_risk)]=[\n", - " feature['properties']['hexid'],\n", - " feature['properties']['carbon_risk_cotton_sum'],\n", - " shape(feature['geometry'])\n", - " ]\n", + " h3_gdf_carbon_risk.loc[len(h3_gdf_carbon_risk)] = [\n", + " feature[\"properties\"][\"hexid\"],\n", + " feature[\"properties\"][\"carbon_risk_cotton_sum\"],\n", + " shape(feature[\"geometry\"]),\n", + " ]\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -1768,7 +1787,9 @@ "metadata": {}, "outputs": [], "source": [ - "h3_gdf_carbon_risk.to_csv('../../datasets/processed/carbon_indicators/carbon_risk_cotton_epsg4326_tCO2e_v2_h3.csv')" + "h3_gdf_carbon_risk.to_csv(\n", + " \"../../datasets/processed/carbon_indicators/carbon_risk_cotton_epsg4326_tCO2e_v2_h3.csv\"\n", + ")" ] }, { @@ -1792,7 +1813,9 @@ "# risk map\n", "start_time = time.time()\n", "hexbin_generator = generate_h3_features(geom, 7)\n", - "raster_path ='../../datasets/processed/probability_map/purchase_area_distribution_rubber_indonesia_4326.tif'\n", + "raster_path = (\n", + " \"../../datasets/processed/probability_map/purchase_area_distribution_rubber_indonesia_4326.tif\"\n", + ")\n", "\n", "raster_stats_h3 = gen_zonal_stats(\n", " hexbin_generator,\n", @@ -1800,7 +1823,7 @@ " stats=\"max\",\n", " prefix=\"purchase_area_\",\n", " geojson_out=True,\n", - " all_touched=True\n", + " all_touched=True,\n", ")\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] @@ -1820,7 +1843,7 @@ } ], "source": [ - "#check output\n", + "# check output\n", "for feature in raster_stats_h3:\n", " print(feature)\n", " break" @@ -1841,15 +1864,15 @@ } ], "source": [ - "#generate a dataframe with the elements\n", + "# generate a dataframe with the elements\n", "start_time = time.time()\n", - "h3_gdf_purchase = pd.DataFrame([],columns=['h3_id', 'purchase_area_max', 'geometry'])\n", + "h3_gdf_purchase = pd.DataFrame([], columns=[\"h3_id\", \"purchase_area_max\", \"geometry\"])\n", "for feature in raster_stats_h3:\n", - " h3_gdf_purchase.loc[len(h3_gdf_purchase)]=[\n", - " feature['properties']['hexid'],\n", - " feature['properties']['purchase_area_max'],\n", - " shape(feature['geometry'])\n", - " ]\n", + " h3_gdf_purchase.loc[len(h3_gdf_purchase)] = [\n", + " feature[\"properties\"][\"hexid\"],\n", + " feature[\"properties\"][\"purchase_area_max\"],\n", + " shape(feature[\"geometry\"]),\n", + " ]\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -1860,7 +1883,9 @@ "metadata": {}, "outputs": [], "source": [ - "h3_gdf_purchase.to_csv('../../datasets/processed/carbon_indicators/probability_purchase_area_h3.csv')" + "h3_gdf_purchase.to_csv(\n", + " \"../../datasets/processed/carbon_indicators/probability_purchase_area_h3.csv\"\n", + ")" ] }, { @@ -1870,14 +1895,16 @@ "metadata": {}, "outputs": [], "source": [ - "#obtain the carbon impact in h3\n", + "# obtain the carbon impact in h3\n", "# do the h3 for the probability purchase area - user data\n", "\n", "# perform raster summary stats with the h3 feature\n", "# risk map\n", "start_time = time.time()\n", "hexbin_generator = generate_h3_features(geom, 7)\n", - "raster_path ='../../datasets/processed/probability_map/purchase_area_distribution_rubber_indonesia_4326.tif'\n", + "raster_path = (\n", + " \"../../datasets/processed/probability_map/purchase_area_distribution_rubber_indonesia_4326.tif\"\n", + ")\n", "\n", "raster_stats_h3 = gen_zonal_stats(\n", " hexbin_generator,\n", @@ -1885,7 +1912,7 @@ " stats=\"max\",\n", " prefix=\"purchase_area_\",\n", " geojson_out=True,\n", - " all_touched=True\n", + " all_touched=True,\n", ")\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] diff --git a/data/notebooks/Lab/5_Produce_mock_data.ipynb b/data/notebooks/Lab/5_Produce_mock_data.ipynb index 1b0aebd41..2139a8249 100644 --- a/data/notebooks/Lab/5_Produce_mock_data.ipynb +++ b/data/notebooks/Lab/5_Produce_mock_data.ipynb @@ -37,7 +37,6 @@ "source": [ "import geopandas as gpd\n", "import pandas as pd\n", - "\n", "from processing.geolocating_data import GeolocateAddress" ] }, @@ -47,14 +46,8 @@ "metadata": {}, "outputs": [], "source": [ - "import rasterio as rio\n", - "import numpy as np\n", - "import rasterio.plot\n", "import matplotlib.pyplot as plt\n", - "from rasterio.plot import show_hist\n", - "\n", - "\n", - "from rasterstats import gen_zonal_stats\n", + "import rasterio.plot\n", "from rasterstats import zonal_stats" ] }, @@ -64,12 +57,14 @@ "metadata": {}, "outputs": [], "source": [ + "import io\n", + "import os\n", + "\n", "# import lib\n", "import time\n", - "import os\n", - "import io\n", - "import requests\n", - "import zipfile" + "import zipfile\n", + "\n", + "import requests" ] }, { @@ -86,8 +81,8 @@ "metadata": {}, "outputs": [], "source": [ - "processed_path = '../../datasets/processed/'\n", - "raw_path = '../../datasets/raw/'" + "processed_path = \"../../datasets/processed/\"\n", + "raw_path = \"../../datasets/raw/\"" ] }, { @@ -220,8 +215,8 @@ } ], "source": [ - "#import user data\n", - "supply_data = gpd.read_file(processed_path + 'user_data/located_lg_data_polygon_v2.shp')\n", + "# import user data\n", + "supply_data = gpd.read_file(processed_path + \"user_data/located_lg_data_polygon_v2.shp\")\n", "supply_data.head()" ] }, @@ -231,7 +226,7 @@ "metadata": {}, "outputs": [], "source": [ - "#user data is in the 4326 projection\n", + "# user data is in the 4326 projection\n", "supply_data = supply_data.set_crs(\"EPSG:4326\")" ] }, @@ -252,8 +247,8 @@ } ], "source": [ - "#check unique commodities for calculating the risk maps\n", - "set(supply_data['Material'])" + "# check unique commodities for calculating the risk maps\n", + "set(supply_data[\"Material\"])" ] }, { @@ -305,24 +300,28 @@ } ], "source": [ - "#DOWNLOAD DEFAULT CROP DATA\n", - "url_ag_ha_2010 = 'https://s3.amazonaws.com/mapspam/2010/v2.0/geotiff/spam2010v2r0_global_harv_area.geotiff.zip'\n", - "url__ag_yield_2010 = 'https://s3.amazonaws.com/mapspam/2017/ssa/v2.1/geotiff/spam2017v2r1_ssa_yield.geotiff.zip'\n", - "local_path = '../../datasets/raw/crop_data/default_crop'\n", + "# DOWNLOAD DEFAULT CROP DATA\n", + "url_ag_ha_2010 = (\n", + " \"https://s3.amazonaws.com/mapspam/2010/v2.0/geotiff/spam2010v2r0_global_harv_area.geotiff.zip\"\n", + ")\n", + "url__ag_yield_2010 = (\n", + " \"https://s3.amazonaws.com/mapspam/2017/ssa/v2.1/geotiff/spam2017v2r1_ssa_yield.geotiff.zip\"\n", + ")\n", + "local_path = \"../../datasets/raw/crop_data/default_crop\"\n", "\n", "if not os.path.isdir(local_path):\n", " os.mkdir(local_path)\n", - "print('Downloading agr harvest area dataset...')\n", + "print(\"Downloading agr harvest area dataset...\")\n", "r = requests.get(url_ag_ha_2010)\n", "z = zipfile.ZipFile(io.BytesIO(r.content))\n", "print(\"Done harvest area!\")\n", - "z.extractall(path=local_path) # extract to folder\n", + "z.extractall(path=local_path) # extract to folder\n", "\n", - "print('Downloading agr yield dataset...')\n", + "print(\"Downloading agr yield dataset...\")\n", "r = requests.get(url__ag_yield_2010)\n", "z = zipfile.ZipFile(io.BytesIO(r.content))\n", "print(\"Done\")\n", - "z.extractall(path=local_path) # extract to folder\n", + "z.extractall(path=local_path) # extract to folder\n", "print(\"Done yield!\")" ] }, @@ -365,12 +364,12 @@ } ], "source": [ - "#explore download raster - harvest area fraction\n", + "# explore download raster - harvest area fraction\n", "a = rasterio.open(default_ag_ha_2010)\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Greens')\n", + "plt.imshow(a.read(1), cmap=\"Greens\")\n", "plt.show()" ] }, @@ -403,12 +402,12 @@ } ], "source": [ - "#explore download raster - harvest area yield\n", + "# explore download raster - harvest area yield\n", "a = rasterio.open(default_ag_yield_2010)\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Greens')\n", + "plt.imshow(a.read(1), cmap=\"Greens\")\n", "plt.show()" ] }, @@ -434,17 +433,19 @@ } ], "source": [ - "#DOWNLOAD DEFAULT PASTURE DATA:\n", - "url_ps_ha_2000 = 'https://s3.us-east-2.amazonaws.com/earthstatdata/CroplandPastureArea2000_Geotiff.zip'\n", - "local_path = '../../datasets/raw/crop_data/default_pasture'\n", + "# DOWNLOAD DEFAULT PASTURE DATA:\n", + "url_ps_ha_2000 = (\n", + " \"https://s3.us-east-2.amazonaws.com/earthstatdata/CroplandPastureArea2000_Geotiff.zip\"\n", + ")\n", + "local_path = \"../../datasets/raw/crop_data/default_pasture\"\n", "\n", "if not os.path.isdir(local_path):\n", " os.mkdir(local_path)\n", - "print('Downloading agr harvest area dataset...')\n", + "print(\"Downloading agr harvest area dataset...\")\n", "r = requests.get(url_ps_ha_2000)\n", "z = zipfile.ZipFile(io.BytesIO(r.content))\n", "print(\"Done harvest area pasture!\")\n", - "z.extractall(path=local_path) # extract to folder\n" + "z.extractall(path=local_path) # extract to folder" ] }, { @@ -478,12 +479,12 @@ } ], "source": [ - "#explore download raster - harvest area fraction\n", + "# explore download raster - harvest area fraction\n", "a = rasterio.open(default_pasture_ha_2000)\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Greens')\n", + "plt.imshow(a.read(1), cmap=\"Greens\")\n", "plt.show()" ] }, @@ -678,7 +679,9 @@ } ], "source": [ - "pasture_yield_2000 = pd.read_csv('../../datasets/raw/crop_data/default_pasture/FAOSTAT_data_6-22-2021_yield.csv')\n", + "pasture_yield_2000 = pd.read_csv(\n", + " \"../../datasets/raw/crop_data/default_pasture/FAOSTAT_data_6-22-2021_yield.csv\"\n", + ")\n", "pasture_yield_2000.head()" ] }, @@ -1737,11 +1740,11 @@ "retrieved_geoms = []\n", "for i, row in pasture_yield_2000.iterrows():\n", " try:\n", - " geo_request = GeolocateAddress(query=row['Area'])\n", - " gdf = gpd.GeoDataFrame.from_features(geo_request.polygon_json, crs='epsg:4326')\n", - " geom = gdf['geometry'][0]\n", + " geo_request = GeolocateAddress(query=row[\"Area\"])\n", + " gdf = gpd.GeoDataFrame.from_features(geo_request.polygon_json, crs=\"epsg:4326\")\n", + " geom = gdf[\"geometry\"][0]\n", " except:\n", - " print(row['Area'])\n", + " print(row[\"Area\"])\n", " geom = None\n", " retrieved_geoms.append(geom)" ] @@ -1752,7 +1755,7 @@ "metadata": {}, "outputs": [], "source": [ - "pasture_yield_2000['geometry']=retrieved_geoms\n" + "pasture_yield_2000[\"geometry\"] = retrieved_geoms" ] }, { @@ -1938,7 +1941,7 @@ "metadata": {}, "outputs": [], "source": [ - "pasture_yield_2000 = pasture_yield_2000[pasture_yield_2000['geometry']!=None]" + "pasture_yield_2000 = pasture_yield_2000[pasture_yield_2000[\"geometry\"] is not None]" ] }, { @@ -1947,7 +1950,7 @@ "metadata": {}, "outputs": [], "source": [ - "pasture_yield_2000 = pasture_yield_2000.set_geometry('geometry')" + "pasture_yield_2000 = pasture_yield_2000.set_geometry(\"geometry\")" ] }, { @@ -1956,7 +1959,9 @@ "metadata": {}, "outputs": [], "source": [ - "pasture_yield_2000 = pasture_yield_2000[pasture_yield_2000['geometry'].apply(lambda x : x.type!='Point' )]" + "pasture_yield_2000 = pasture_yield_2000[\n", + " pasture_yield_2000[\"geometry\"].apply(lambda x: x.type != \"Point\")\n", + "]" ] }, { @@ -1974,7 +1979,10 @@ } ], "source": [ - "pasture_yield_2000.to_file(\"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield.shp\", driver='ESRI Shapefile')" + "pasture_yield_2000.to_file(\n", + " \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield.shp\",\n", + " driver=\"ESRI Shapefile\",\n", + ")" ] }, { @@ -2144,8 +2152,10 @@ } ], "source": [ - "#open the yield of pasture globally\n", - "default_pasture_yield_2000 = gpd.read_file(\"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield.shp\")\n", + "# open the yield of pasture globally\n", + "default_pasture_yield_2000 = gpd.read_file(\n", + " \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield.shp\"\n", + ")\n", "default_pasture_yield_2000.head()" ] }, @@ -2317,8 +2327,10 @@ } ], "source": [ - "pasture_per_hectare_2000 = pd.read_csv('../../datasets/raw/crop_data/default_pasture/FAOSTAT_data_6-23-2021_livestock_per_ha.csv')\n", - "pasture_per_hectare_2000.head() " + "pasture_per_hectare_2000 = pd.read_csv(\n", + " \"../../datasets/raw/crop_data/default_pasture/FAOSTAT_data_6-23-2021_livestock_per_ha.csv\"\n", + ")\n", + "pasture_per_hectare_2000.head()" ] }, { @@ -2510,21 +2522,23 @@ "animal_ha_list = []\n", "yield_t_ha_list = []\n", "for i, row in default_pasture_yield_2000.iterrows():\n", - " location = row['Area']\n", - " yield_hg_an = row['Value']\n", + " location = row[\"Area\"]\n", + " yield_hg_an = row[\"Value\"]\n", " try:\n", - " animal_ha = pasture_per_hectare_2000[pasture_per_hectare_2000['Area']==location].iloc[0]['Value']\n", - " \n", + " animal_ha = pasture_per_hectare_2000[pasture_per_hectare_2000[\"Area\"] == location].iloc[0][\n", + " \"Value\"\n", + " ]\n", + "\n", " except:\n", " animal_ha = 0\n", " # 0.0001 converts hg to tonnes\n", " yield_t_ha = float(yield_hg_an) * 0.0001 * float(animal_ha)\n", " animal_ha_list.append(animal_ha)\n", " yield_t_ha_list.append(yield_t_ha)\n", - " \n", + "\n", "##append to main geodataframe\n", - "default_pasture_yield_2000['animal_ha'] = animal_ha_list\n", - "default_pasture_yield_2000['yield_t_ha'] = yield_t_ha_list\n", + "default_pasture_yield_2000[\"animal_ha\"] = animal_ha_list\n", + "default_pasture_yield_2000[\"yield_t_ha\"] = yield_t_ha_list\n", "default_pasture_yield_2000.head()" ] }, @@ -2534,8 +2548,11 @@ "metadata": {}, "outputs": [], "source": [ - "#save file\n", - "default_pasture_yield_2000.to_file(\"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield.shp\", driver='ESRI Shapefile')" + "# save file\n", + "default_pasture_yield_2000.to_file(\n", + " \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield.shp\",\n", + " driver=\"ESRI Shapefile\",\n", + ")" ] }, { @@ -2552,7 +2569,7 @@ } ], "source": [ - "#convert file to raster with same extent and resolution that the pasture harvset area\n", + "# convert file to raster with same extent and resolution that the pasture harvset area\n", "!gdal_rasterize -l Pasture2000_5m_yield -a yield_t_ha -tr 0.083333 0.083333 -a_nodata 0.0 -te -180.0 -90.0 180.0 90.0 -ot Float32 -of GTiff \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield.shp\" \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield.tif\"" ] }, @@ -2570,10 +2587,10 @@ } ], "source": [ - "#get the generated yield just for those areas whete the harvest area is greater than 0\n", + "# get the generated yield just for those areas whete the harvest area is greater than 0\n", "# 1. produce a harvest area pasture with just 1 values\n", "default_pasture_ha_2000 = \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_ext_v2.tif\"\n", - "!gdal_calc.py -A $default_pasture_ha_2000 --outfile=\"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_normalised_val_v2.tif\" --calc=\"(A>0)\" --NoDataValue -3.402823e+38 " + "!gdal_calc.py -A $default_pasture_ha_2000 --outfile=\"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_normalised_val_v2.tif\" --calc=\"(A>0)\" --NoDataValue -3.402823e+38" ] }, { @@ -2590,8 +2607,8 @@ } ], "source": [ - "#multiply pasture yield with the normalised raster\n", - "!gdal_calc.py -A \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_normalised_val_v2.tif\" -B \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield.tif\" --outfile=\"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield_v4.tif\" --calc=\"A*B\" --NoDataValue -3.402823e+38 " + "# multiply pasture yield with the normalised raster\n", + "!gdal_calc.py -A \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_normalised_val_v2.tif\" -B \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield.tif\" --outfile=\"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield_v4.tif\" --calc=\"A*B\" --NoDataValue -3.402823e+38" ] }, { @@ -2623,13 +2640,13 @@ } ], "source": [ - "#explore download raster - harvest area fraction\n", + "# explore download raster - harvest area fraction\n", "default_pasture_y_2000 = \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield_v4.tif\"\n", "a = rasterio.open(default_pasture_y_2000)\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Greens')\n", + "plt.imshow(a.read(1), cmap=\"Greens\")\n", "plt.show()" ] }, @@ -2646,8 +2663,8 @@ "metadata": {}, "outputs": [], "source": [ - "cotton_2000_ha_4326 = '../../datasets/raw/crop_data/cotton/cotton_HarvestedAreaFraction.tif'\n", - "cotton_2000_y_4326 = '../../datasets/raw/crop_data/cotton/cotton_YieldPerHectare.tif'" + "cotton_2000_ha_4326 = \"../../datasets/raw/crop_data/cotton/cotton_HarvestedAreaFraction.tif\"\n", + "cotton_2000_y_4326 = \"../../datasets/raw/crop_data/cotton/cotton_YieldPerHectare.tif\"" ] }, { @@ -2679,12 +2696,12 @@ } ], "source": [ - "#explore download raster - harvest area fraction\n", + "# explore download raster - harvest area fraction\n", "a = rasterio.open(cotton_2000_ha_4326)\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Greens')\n", + "plt.imshow(a.read(1), cmap=\"Greens\")\n", "plt.show()" ] }, @@ -2717,12 +2734,12 @@ } ], "source": [ - "#explore download raster - harvest area fraction\n", + "# explore download raster - harvest area fraction\n", "a = rasterio.open(cotton_2000_y_4326)\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Greens')\n", + "plt.imshow(a.read(1), cmap=\"Greens\")\n", "plt.show()" ] }, @@ -2739,8 +2756,8 @@ "metadata": {}, "outputs": [], "source": [ - "rubber_2000_ha_4326 = '../../datasets/raw/crop_data/rubber/rubber_HarvestedAreaFraction.tif'\n", - "rubber_2000_y_4326 = '../../datasets/raw/crop_data/rubber/rubber_YieldPerHectare.tif'" + "rubber_2000_ha_4326 = \"../../datasets/raw/crop_data/rubber/rubber_HarvestedAreaFraction.tif\"\n", + "rubber_2000_y_4326 = \"../../datasets/raw/crop_data/rubber/rubber_YieldPerHectare.tif\"" ] }, { @@ -2772,12 +2789,12 @@ } ], "source": [ - "#explore download raster - harvest area fraction\n", + "# explore download raster - harvest area fraction\n", "a = rasterio.open(rubber_2000_ha_4326)\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Greens')\n", + "plt.imshow(a.read(1), cmap=\"Greens\")\n", "plt.show()" ] }, @@ -2810,12 +2827,12 @@ } ], "source": [ - "#explore download raster - harvest area fraction\n", + "# explore download raster - harvest area fraction\n", "a = rasterio.open(rubber_2000_y_4326)\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Greens')\n", + "plt.imshow(a.read(1), cmap=\"Greens\")\n", "plt.show()" ] }, @@ -2859,16 +2876,16 @@ "outputs": [], "source": [ "# download generic water footprint\n", - "url = 'https://data.4tu.nl/articles/dataset/The_green_blue_grey_and_total_water_footprint_related_to_production/12675440'\n", - "local_path = '../../datasets/raw/water_indicators/'\n", + "url = \"https://data.4tu.nl/articles/dataset/The_green_blue_grey_and_total_water_footprint_related_to_production/12675440\"\n", + "local_path = \"../../datasets/raw/water_indicators/\"\n", "\n", - "#if not os.path.isdir(local_path):\n", + "# if not os.path.isdir(local_path):\n", "# os.mkdir(local_path)\n", - "#print('Downloading dataset...')\n", - "#r = requests.get(url)\n", - "#z = zipfile.ZipFile(io.BytesIO(r.content))\n", - "#print(\"Done\")\n", - "#z.extractall(path=local_path) # extract to folder" + "# print('Downloading dataset...')\n", + "# r = requests.get(url)\n", + "# z = zipfile.ZipFile(io.BytesIO(r.content))\n", + "# print(\"Done\")\n", + "# z.extractall(path=local_path) # extract to folder" ] }, { @@ -2894,7 +2911,9 @@ "metadata": {}, "outputs": [], "source": [ - "default_blwf_1996_2005 = '../../datasets/raw/water_indicators/Report50-WF-of-prodn-RasterFiles/wf_totagr_mm/hdr.adf'" + "default_blwf_1996_2005 = (\n", + " \"../../datasets/raw/water_indicators/Report50-WF-of-prodn-RasterFiles/wf_totagr_mm/hdr.adf\"\n", + ")" ] }, { @@ -2930,7 +2949,7 @@ "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Blues')\n", + "plt.imshow(a.read(1), cmap=\"Blues\")\n", "plt.show()" ] }, @@ -2963,7 +2982,7 @@ } ], "source": [ - "#change extent in blwf generic data\n", + "# change extent in blwf generic data\n", "!gdal_translate -projwin -180.0 90.0 180.0 -90.0 -of GTiff '../../datasets/raw/water_indicators/Report50-WF-of-prodn-RasterFiles/wf_totagr_mm/hdr.adf' '../../datasets/raw/water_indicators/Report50-WF-of-prodn-RasterFiles/wf_totagr_mm/hdr_ext_v2.tif'" ] }, @@ -2982,7 +3001,7 @@ } ], "source": [ - "#change extent in default aggricuture harvest area fraction \n", + "# change extent in default aggricuture harvest area fraction\n", "!gdal_translate -projwin -180.0 90.0 180.0 -90.0 -of GTiff \"../../datasets/raw/crop_data/default_crop/spam2010V2r0_global_H_REST_A.tif\" \"../../datasets/raw/crop_data/default_crop/spam2010V2r0_global_H_REST_A_ext_v2.tif\"" ] }, @@ -3001,7 +3020,7 @@ } ], "source": [ - "#change extent in default crop yield\n", + "# change extent in default crop yield\n", "!gdal_translate -projwin -180.0 90.0 180.0 -90.0 -of GTiff \"../../datasets/raw/crop_data/default_crop/spam2017V2r1_SSA_Y_REST_A.tif\" \"../../datasets/raw/crop_data/default_crop/spam2017V2r1_SSA_Y_REST_A_ext_v2.tif\"" ] }, @@ -3020,8 +3039,8 @@ } ], "source": [ - "#change extent in default pasture harvest area fraction\n", - "!gdal_translate -projwin -180.0 90.0 180.0 -90.0 -of GTiff \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m.tif\" \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_ext_v2.tif\" " + "# change extent in default pasture harvest area fraction\n", + "!gdal_translate -projwin -180.0 90.0 180.0 -90.0 -of GTiff \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m.tif\" \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_ext_v2.tif\"" ] }, { @@ -3037,9 +3056,13 @@ "metadata": {}, "outputs": [], "source": [ - "default_blwf_1996_2005_ext = '../../datasets/raw/water_indicators/Report50-WF-of-prodn-RasterFiles/wf_totagr_mm/hdr_ext_v2.tif'\n", - "default_ag_ha_2010 = \"../../datasets/raw/crop_data/default_crop/spam2010V2r0_global_H_REST_A_ext_v2.tif\"\n", - "default_ag_yield_2010 = \"../../datasets/raw/crop_data/default_crop/spam2017V2r1_SSA_Y_REST_A_ext_v2.tif\"\n", + "default_blwf_1996_2005_ext = \"../../datasets/raw/water_indicators/Report50-WF-of-prodn-RasterFiles/wf_totagr_mm/hdr_ext_v2.tif\"\n", + "default_ag_ha_2010 = (\n", + " \"../../datasets/raw/crop_data/default_crop/spam2010V2r0_global_H_REST_A_ext_v2.tif\"\n", + ")\n", + "default_ag_yield_2010 = (\n", + " \"../../datasets/raw/crop_data/default_crop/spam2017V2r1_SSA_Y_REST_A_ext_v2.tif\"\n", + ")\n", "default_pasture_ha_2000 = \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_ext_v2.tif\"\n", "default_pasture_y_2000 = \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield_v4.tif\"" ] @@ -3073,12 +3096,12 @@ } ], "source": [ - "#explore download raster - harvest area fraction\n", + "# explore download raster - harvest area fraction\n", "a = rasterio.open(default_blwf_1996_2005_ext)\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Greens')\n", + "plt.imshow(a.read(1), cmap=\"Greens\")\n", "plt.show()" ] }, @@ -3111,12 +3134,12 @@ } ], "source": [ - "#explore download raster - harvest area fraction\n", + "# explore download raster - harvest area fraction\n", "a = rasterio.open(default_ag_yield_2010)\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Greens')\n", + "plt.imshow(a.read(1), cmap=\"Greens\")\n", "plt.show()" ] }, @@ -3149,12 +3172,12 @@ } ], "source": [ - "#explore download raster - harvest area fraction\n", + "# explore download raster - harvest area fraction\n", "a = rasterio.open(default_ag_ha_2010)\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Greens')\n", + "plt.imshow(a.read(1), cmap=\"Greens\")\n", "plt.show()" ] }, @@ -3187,12 +3210,12 @@ } ], "source": [ - "#explore download raster - harvest area fraction\n", + "# explore download raster - harvest area fraction\n", "a = rasterio.open(default_pasture_ha_2000)\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Greens')\n", + "plt.imshow(a.read(1), cmap=\"Greens\")\n", "plt.show()" ] }, @@ -3225,12 +3248,12 @@ } ], "source": [ - "#explore download raster - harvest area fraction\n", + "# explore download raster - harvest area fraction\n", "a = rasterio.open(default_pasture_y_2000)\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Greens')\n", + "plt.imshow(a.read(1), cmap=\"Greens\")\n", "plt.show()" ] }, @@ -3262,7 +3285,7 @@ } ], "source": [ - "#remove 0 from yield to avoid inf values in default crop\n", + "# remove 0 from yield to avoid inf values in default crop\n", "!gdal_calc.py -A \"../../datasets/raw/crop_data/default_crop/spam2017V2r1_SSA_Y_REST_A_ext_v2.tif\" --outfile=\"../../datasets/raw/crop_data/default_crop/spam2017V2r1_SSA_Y_REST_A_ext_v2_NoZero.tif\" --calc=\"A/(A!=0)\"" ] }, @@ -3295,12 +3318,14 @@ } ], "source": [ - "#explore download raster - harvest area fraction\n", - "a = rasterio.open(\"../../datasets/raw/crop_data/default_crop/spam2017V2r1_SSA_Y_REST_A_ext_v2_NoZero.tif\")\n", + "# explore download raster - harvest area fraction\n", + "a = rasterio.open(\n", + " \"../../datasets/raw/crop_data/default_crop/spam2017V2r1_SSA_Y_REST_A_ext_v2_NoZero.tif\"\n", + ")\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Greens')\n", + "plt.imshow(a.read(1), cmap=\"Greens\")\n", "plt.show()" ] }, @@ -3319,8 +3344,8 @@ } ], "source": [ - "#remove the zeros to the pasture yield to avoid inf results\n", - "!gdal_calc.py -A \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield_v4.tif\" --outfile=\"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield_v5_NoZeros.tif\" --calc=\"A/(A!=0)\" --NoDataValue -3.402823e+38 " + "# remove the zeros to the pasture yield to avoid inf results\n", + "!gdal_calc.py -A \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield_v4.tif\" --outfile=\"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield_v5_NoZeros.tif\" --calc=\"A/(A!=0)\" --NoDataValue -3.402823e+38" ] }, { @@ -3352,12 +3377,14 @@ } ], "source": [ - "#explore download raster - harvest area fraction\n", - "a = rasterio.open(\"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield_v5_NoZeros.tif\")\n", + "# explore download raster - harvest area fraction\n", + "a = rasterio.open(\n", + " \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield_v5_NoZeros.tif\"\n", + ")\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Greens')\n", + "plt.imshow(a.read(1), cmap=\"Greens\")\n", "plt.show()" ] }, @@ -3377,7 +3404,7 @@ ], "source": [ "# remove 0 in cotton to avoid inf results\n", - "!gdal_calc.py -A '../../datasets/raw/crop_data/cotton/cotton_YieldPerHectare.tif' --outfile='../../datasets/raw/crop_data/cotton/cotton_YieldPerHectare_NoZeris.tif' --calc=\"A/(A!=0)\" --NoDataValue -3.402823e+38 " + "!gdal_calc.py -A '../../datasets/raw/crop_data/cotton/cotton_YieldPerHectare.tif' --outfile='../../datasets/raw/crop_data/cotton/cotton_YieldPerHectare_NoZeris.tif' --calc=\"A/(A!=0)\" --NoDataValue -3.402823e+38" ] }, { @@ -3409,12 +3436,12 @@ } ], "source": [ - "#explore download raster - harvest area fraction\n", - "a = rasterio.open('../../datasets/raw/crop_data/cotton/cotton_YieldPerHectare_NoZeris.tif')\n", + "# explore download raster - harvest area fraction\n", + "a = rasterio.open(\"../../datasets/raw/crop_data/cotton/cotton_YieldPerHectare_NoZeris.tif\")\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Greens')\n", + "plt.imshow(a.read(1), cmap=\"Greens\")\n", "plt.show()" ] }, @@ -3434,7 +3461,7 @@ ], "source": [ "# remove 0 in rubber to avoid inf results\n", - "!gdal_calc.py -A '../../datasets/raw/crop_data/rubber/rubber_YieldPerHectare.tif' --outfile='../../datasets/raw/crop_data/rubber/rubber_YieldPerHectare_NoZeros.tif' --calc=\"A/(A!=0)\" --NoDataValue -3.402823e+38 " + "!gdal_calc.py -A '../../datasets/raw/crop_data/rubber/rubber_YieldPerHectare.tif' --outfile='../../datasets/raw/crop_data/rubber/rubber_YieldPerHectare_NoZeros.tif' --calc=\"A/(A!=0)\" --NoDataValue -3.402823e+38" ] }, { @@ -3466,12 +3493,12 @@ } ], "source": [ - "#explore download raster - harvest area fraction\n", - "a = rasterio.open('../../datasets/raw/crop_data/rubber/rubber_YieldPerHectare_NoZeros.tif')\n", + "# explore download raster - harvest area fraction\n", + "a = rasterio.open(\"../../datasets/raw/crop_data/rubber/rubber_YieldPerHectare_NoZeros.tif\")\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Greens')\n", + "plt.imshow(a.read(1), cmap=\"Greens\")\n", "plt.show()" ] }, @@ -3481,15 +3508,19 @@ "metadata": {}, "outputs": [], "source": [ - "default_blwf_1996_2005_ext = '../../datasets/raw/water_indicators/Report50-WF-of-prodn-RasterFiles/wf_totagr_mm/hdr_ext_v2.tif'\n", - "default_ag_ha_2010 = \"../../datasets/raw/crop_data/default_crop/spam2010V2r0_global_H_REST_A_ext_v2.tif\"\n", - "default_ag_yield_2010 = \"../../datasets/raw/crop_data/default_crop/spam2017V2r1_SSA_Y_REST_A_ext_v2_NoZero.tif\" \n", + "default_blwf_1996_2005_ext = \"../../datasets/raw/water_indicators/Report50-WF-of-prodn-RasterFiles/wf_totagr_mm/hdr_ext_v2.tif\"\n", + "default_ag_ha_2010 = (\n", + " \"../../datasets/raw/crop_data/default_crop/spam2010V2r0_global_H_REST_A_ext_v2.tif\"\n", + ")\n", + "default_ag_yield_2010 = (\n", + " \"../../datasets/raw/crop_data/default_crop/spam2017V2r1_SSA_Y_REST_A_ext_v2_NoZero.tif\"\n", + ")\n", "default_pasture_ha_2000 = \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_ext_v2.tif\"\n", "default_pasture_y_2000 = \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield_v5_NoZeros.tif\"\n", - "cotton_y_2000 = '../../datasets/raw/crop_data/cotton/cotton_YieldPerHectare_NoZeris.tif'\n", - "cotton_ha_2000 = '../../datasets/raw/crop_data/cotton/cotton_HarvestedAreaFraction.tif'\n", - "rubber_y_2000 = '../../datasets/raw/crop_data/rubber/rubber_YieldPerHectare_NoZeros.tif'\n", - "rubber_ha_2000 = '../../datasets/raw/crop_data/rubber/rubber_HarvestedAreaFraction.tif'" + "cotton_y_2000 = \"../../datasets/raw/crop_data/cotton/cotton_YieldPerHectare_NoZeris.tif\"\n", + "cotton_ha_2000 = \"../../datasets/raw/crop_data/cotton/cotton_HarvestedAreaFraction.tif\"\n", + "rubber_y_2000 = \"../../datasets/raw/crop_data/rubber/rubber_YieldPerHectare_NoZeros.tif\"\n", + "rubber_ha_2000 = \"../../datasets/raw/crop_data/rubber/rubber_HarvestedAreaFraction.tif\"" ] }, { @@ -3516,7 +3547,7 @@ } ], "source": [ - "#CALCULATE DEFAULT AGRICULTURAL RISK\n", + "# CALCULATE DEFAULT AGRICULTURAL RISK\n", "!gdal_calc.py -A $default_blwf_1996_2005_ext -B $default_ag_ha_2010 -C $default_ag_yield_2010 --outfile='../../datasets/processed/water_indicators/water_risk_4323_2010_v2.tif' --calc=\"A *B*(1/C)*(10000/1000)\"" ] }, @@ -3549,13 +3580,15 @@ } ], "source": [ - "#explore default aggricultural risk map\n", - "default_aggri_risk_2010 = \"../../datasets/processed/water_indicators/water_risk_aggriculture_4326_2010_v2.tif\"\n", + "# explore default aggricultural risk map\n", + "default_aggri_risk_2010 = (\n", + " \"../../datasets/processed/water_indicators/water_risk_aggriculture_4326_2010_v2.tif\"\n", + ")\n", "a = rasterio.open(default_aggri_risk_2010)\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Oranges')\n", + "plt.imshow(a.read(1), cmap=\"Oranges\")\n", "plt.show()" ] }, @@ -3584,7 +3617,7 @@ ], "source": [ "## the results from gdal cal produce inf results\n", - "!gdal_calc.py -A $default_blwf_1996_2005_ext -B $default_pasture_ha_2000 -C $default_pasture_y_2000 --outfile='../../datasets/processed/water_indicators/water_risk_pasture_4326_2000_v9.tif' --calc=\"A*B*(1/C)*(10000/1000)\" --NoDataValue -3.402823e+38 " + "!gdal_calc.py -A $default_blwf_1996_2005_ext -B $default_pasture_ha_2000 -C $default_pasture_y_2000 --outfile='../../datasets/processed/water_indicators/water_risk_pasture_4326_2000_v9.tif' --calc=\"A*B*(1/C)*(10000/1000)\" --NoDataValue -3.402823e+38" ] }, { @@ -3625,13 +3658,15 @@ } ], "source": [ - "#explore default pasture risk map\n", - "default_pasture_risk_2000 = \"../../datasets/processed/water_indicators/water_risk_pasture_4326_2000_v10.tif\"\n", + "# explore default pasture risk map\n", + "default_pasture_risk_2000 = (\n", + " \"../../datasets/processed/water_indicators/water_risk_pasture_4326_2000_v10.tif\"\n", + ")\n", "a = rasterio.open(default_pasture_risk_2000)\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Oranges')\n", + "plt.imshow(a.read(1), cmap=\"Oranges\")\n", "plt.show()" ] }, @@ -3659,7 +3694,7 @@ } ], "source": [ - "#calculate global water risk for cotton in 2000\n", + "# calculate global water risk for cotton in 2000\n", "!gdal_calc.py -A $default_blwf_1996_2005_ext -B $cotton_ha_2000 -C $cotton_y_2000 --outfile='../../datasets/processed/water_indicators/water_risk_cotton_4326_2000.tif' --calc=\"A *B*(1/C)*(10000/1000)\"" ] }, @@ -3692,12 +3727,12 @@ } ], "source": [ - "#explore default pasture risk map\n", - "a = rasterio.open('../../datasets/processed/water_indicators/water_risk_cotton_4326_2000.tif')\n", + "# explore default pasture risk map\n", + "a = rasterio.open(\"../../datasets/processed/water_indicators/water_risk_cotton_4326_2000.tif\")\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Oranges')\n", + "plt.imshow(a.read(1), cmap=\"Oranges\")\n", "plt.show()" ] }, @@ -3725,7 +3760,7 @@ } ], "source": [ - "#calculate global water risk for rubber in 2000\n", + "# calculate global water risk for rubber in 2000\n", "!gdal_calc.py -A $default_blwf_1996_2005_ext -B $rubber_ha_2000 -C $rubber_y_2000 --outfile='../../datasets/processed/water_indicators/water_risk_rubber_4326_2000.tif' --calc=\"A *B*(1/C)*(10000/1000)\"" ] }, @@ -3758,12 +3793,12 @@ } ], "source": [ - "#explore default pasture risk map\n", - "a = rasterio.open('../../datasets/processed/water_indicators/water_risk_rubber_4326_2000.tif')\n", + "# explore default pasture risk map\n", + "a = rasterio.open(\"../../datasets/processed/water_indicators/water_risk_rubber_4326_2000.tif\")\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Oranges')\n", + "plt.imshow(a.read(1), cmap=\"Oranges\")\n", "plt.show()" ] }, @@ -3887,8 +3922,10 @@ } ], "source": [ - "##open the csv \n", - "deforestation = gpd.read_file('../../datasets/raw/deforestation_indicators/annual-change-forest-area.csv')\n", + "##open the csv\n", + "deforestation = gpd.read_file(\n", + " \"../../datasets/raw/deforestation_indicators/annual-change-forest-area.csv\"\n", + ")\n", "deforestation.head()" ] }, @@ -3985,8 +4022,8 @@ } ], "source": [ - "#filter those whose year is 2000\n", - "deforestation = deforestation[deforestation['Year']=='2000']\n", + "# filter those whose year is 2000\n", + "deforestation = deforestation[deforestation[\"Year\"] == \"2000\"]\n", "deforestation.head()" ] }, @@ -4184,15 +4221,17 @@ "source": [ "deforestation_ha_list = []\n", "for i, row in default_pasture_yield_2000.iterrows():\n", - " location = row['Area']\n", + " location = row[\"Area\"]\n", " try:\n", - " deforestation_ha = deforestation[deforestation['Entity']==location].iloc[0]['Net forest conversion'] \n", + " deforestation_ha = deforestation[deforestation[\"Entity\"] == location].iloc[0][\n", + " \"Net forest conversion\"\n", + " ]\n", " except:\n", " deforestation_ha = 0\n", " deforestation_ha_list.append(deforestation_ha)\n", - " \n", + "\n", "##append to main geodataframe\n", - "default_pasture_yield_2000['deforestation_ha'] = deforestation_ha_list\n", + "default_pasture_yield_2000[\"deforestation_ha\"] = deforestation_ha_list\n", "default_pasture_yield_2000.head()" ] }, @@ -4211,8 +4250,11 @@ } ], "source": [ - "# save file \n", - "default_pasture_yield_2000.to_file(\"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield.shp\", driver='ESRI Shapefile')" + "# save file\n", + "default_pasture_yield_2000.to_file(\n", + " \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield.shp\",\n", + " driver=\"ESRI Shapefile\",\n", + ")" ] }, { @@ -4230,7 +4272,7 @@ ], "source": [ "# rasterise to the same resolution\n", - "#convert file to raster with same extent and resolution that the pasture harvset area\n", + "# convert file to raster with same extent and resolution that the pasture harvset area\n", "!gdal_rasterize -l Pasture2000_5m_yield -a deforestat -tr 0.083333 0.083333 -a_nodata 0.0 -te -180.0 -90.0 180.0 90.0 -ot Float32 -of GTiff \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield.shp\" \"../../datasets/raw/deforestation_indicators/global_deforestation_2000_ha_4326.tif\"" ] }, @@ -4240,7 +4282,7 @@ "metadata": {}, "outputs": [], "source": [ - "#set projection to dataset\n", + "# set projection to dataset\n", "!gdal_edit.py -a_srs EPSG:4326 \"../../datasets/raw/deforestation_indicators/global_deforestation_2000_ha_4326.tif\"" ] }, @@ -4263,15 +4305,19 @@ "metadata": {}, "outputs": [], "source": [ - "default_blwf_1996_2005_ext = '../../datasets/raw/water_indicators/Report50-WF-of-prodn-RasterFiles/wf_totagr_mm/hdr_ext_v2.tif'\n", - "default_ag_ha_2010 = \"../../datasets/raw/crop_data/default_crop/spam2010V2r0_global_H_REST_A_ext_v2.tif\"\n", - "default_ag_yield_2010 = \"../../datasets/raw/crop_data/default_crop/spam2017V2r1_SSA_Y_REST_A_ext_v2_NoZero.tif\" \n", + "default_blwf_1996_2005_ext = \"../../datasets/raw/water_indicators/Report50-WF-of-prodn-RasterFiles/wf_totagr_mm/hdr_ext_v2.tif\"\n", + "default_ag_ha_2010 = (\n", + " \"../../datasets/raw/crop_data/default_crop/spam2010V2r0_global_H_REST_A_ext_v2.tif\"\n", + ")\n", + "default_ag_yield_2010 = (\n", + " \"../../datasets/raw/crop_data/default_crop/spam2017V2r1_SSA_Y_REST_A_ext_v2_NoZero.tif\"\n", + ")\n", "default_pasture_ha_2000 = \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_ext_v2.tif\"\n", "default_pasture_y_2000 = \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield_v5_NoZeros.tif\"\n", - "cotton_y_2000 = '../../datasets/raw/crop_data/cotton/cotton_YieldPerHectare_NoZeris.tif'\n", - "cotton_ha_2000 = '../../datasets/raw/crop_data/cotton/cotton_HarvestedAreaFraction.tif'\n", - "rubber_y_2000 = '../../datasets/raw/crop_data/rubber/rubber_YieldPerHectare_NoZeros.tif'\n", - "rubber_ha_2000 = '../../datasets/raw/crop_data/rubber/rubber_HarvestedAreaFraction.tif'" + "cotton_y_2000 = \"../../datasets/raw/crop_data/cotton/cotton_YieldPerHectare_NoZeris.tif\"\n", + "cotton_ha_2000 = \"../../datasets/raw/crop_data/cotton/cotton_HarvestedAreaFraction.tif\"\n", + "rubber_y_2000 = \"../../datasets/raw/crop_data/rubber/rubber_YieldPerHectare_NoZeros.tif\"\n", + "rubber_ha_2000 = \"../../datasets/raw/crop_data/rubber/rubber_HarvestedAreaFraction.tif\"" ] }, { @@ -4280,7 +4326,9 @@ "metadata": {}, "outputs": [], "source": [ - "total_deforestation = \"../../datasets/raw/deforestation_indicators/global_deforestation_2000_ha_4326.tif\"" + "total_deforestation = (\n", + " \"../../datasets/raw/deforestation_indicators/global_deforestation_2000_ha_4326.tif\"\n", + ")" ] }, { @@ -4320,7 +4368,7 @@ } ], "source": [ - "#calculate global cotton deforestation risk in 2000\n", + "# calculate global cotton deforestation risk in 2000\n", "!gdal_calc.py -A $total_deforestation -B $cotton_ha_2000 -C $cotton_y_2000 --outfile='../../datasets/processed/deforestation_indicators/deforestation_risk_cotton_4326_2000.tif' --calc=\"A*B *(1/C)*0.0001\"" ] }, @@ -4353,12 +4401,14 @@ } ], "source": [ - "#explore default cotton risk map\n", - "a = rasterio.open('../../datasets/processed/deforestation_indicators/deforestation_risk_cotton_4326_2000.tif')\n", + "# explore default cotton risk map\n", + "a = rasterio.open(\n", + " \"../../datasets/processed/deforestation_indicators/deforestation_risk_cotton_4326_2000.tif\"\n", + ")\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Oranges')\n", + "plt.imshow(a.read(1), cmap=\"Oranges\")\n", "plt.show()" ] }, @@ -4383,7 +4433,7 @@ } ], "source": [ - "#calculate global cotton deforestation risk in 2000\n", + "# calculate global cotton deforestation risk in 2000\n", "!gdal_calc.py -A $total_deforestation -B $rubber_ha_2000 -C $rubber_y_2000 --outfile='../../datasets/processed/deforestation_indicators/deforestation_risk_rubber_4326_2000.tif' --calc=\"A*B *(1/C)*0.0001\"" ] }, @@ -4416,12 +4466,14 @@ } ], "source": [ - "#explore default rubber risk map\n", - "a = rasterio.open('../../datasets/processed/deforestation_indicators/deforestation_risk_rubber_4326_2000.tif')\n", + "# explore default rubber risk map\n", + "a = rasterio.open(\n", + " \"../../datasets/processed/deforestation_indicators/deforestation_risk_rubber_4326_2000.tif\"\n", + ")\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Oranges')\n", + "plt.imshow(a.read(1), cmap=\"Oranges\")\n", "plt.show()" ] }, @@ -4522,7 +4574,7 @@ } ], "source": [ - "#calculate global cotton deforestation risk in 2000\n", + "# calculate global cotton deforestation risk in 2000\n", "!gdal_calc.py -A $total_deforestation -B \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_ext_v2_NoData.tif\" -C \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield_v5_NoZeros_NoData.tif\" --outfile='../../datasets/processed/deforestation_indicators/deforestation_risk_pasture_4326_2000_v2.tif' --calc=\"A*B*(1/C)*0.0001\"" ] }, @@ -4555,12 +4607,14 @@ } ], "source": [ - "#explore default pasture risk map\n", - "a = rasterio.open('../../datasets/processed/deforestation_indicators/deforestation_risk_pasture_4326_2000_v2.tif')\n", + "# explore default pasture risk map\n", + "a = rasterio.open(\n", + " \"../../datasets/processed/deforestation_indicators/deforestation_risk_pasture_4326_2000_v2.tif\"\n", + ")\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Oranges')\n", + "plt.imshow(a.read(1), cmap=\"Oranges\")\n", "plt.show()" ] }, @@ -4602,10 +4656,18 @@ "metadata": {}, "outputs": [], "source": [ - "total_carbon_emissions_2000 = '../../datasets/raw/carbon_indicators/GHGEmissions_Geotiff/total_emissions.tif'\n", - "cotton_deforestation_risk_2000 = '../../datasets/processed/deforestation_indicators/deforestation_risk_cotton_4326_2000.tif'\n", - "rubber_deforestation_risk_2000 = '../../datasets/processed/deforestation_indicators/deforestation_risk_rubber_4326_2000.tif'\n", - "pasture_deforestation_risk_2000 = '../../datasets/processed/deforestation_indicators/deforestation_risk_pasture_4326_2000.tif'" + "total_carbon_emissions_2000 = (\n", + " \"../../datasets/raw/carbon_indicators/GHGEmissions_Geotiff/total_emissions.tif\"\n", + ")\n", + "cotton_deforestation_risk_2000 = (\n", + " \"../../datasets/processed/deforestation_indicators/deforestation_risk_cotton_4326_2000.tif\"\n", + ")\n", + "rubber_deforestation_risk_2000 = (\n", + " \"../../datasets/processed/deforestation_indicators/deforestation_risk_rubber_4326_2000.tif\"\n", + ")\n", + "pasture_deforestation_risk_2000 = (\n", + " \"../../datasets/processed/deforestation_indicators/deforestation_risk_pasture_4326_2000.tif\"\n", + ")" ] }, { @@ -4637,12 +4699,12 @@ } ], "source": [ - "#explore default pasture risk map\n", + "# explore default pasture risk map\n", "a = rasterio.open(total_carbon_emissions_2000)\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Oranges')\n", + "plt.imshow(a.read(1), cmap=\"Oranges\")\n", "plt.show()" ] }, @@ -4708,7 +4770,7 @@ } ], "source": [ - "#calculate global cotton deforestation risk in 2000\n", + "# calculate global cotton deforestation risk in 2000\n", "!gdal_calc.py -A $total_carbon_emissions_2000 -B '../../datasets/processed/deforestation_indicators/deforestation_risk_cotton_4326_2000_NoData.tif' --outfile='../../datasets/processed/carbon_indicators/carbon_emissions_risk_cotton_4326_2000.tif' --calc=\"A*B\"" ] }, @@ -4741,12 +4803,14 @@ } ], "source": [ - "#explore default pasture risk map\n", - "a = rasterio.open('../../datasets/processed/carbon_indicators/carbon_emissions_risk_cotton_4326_2000.tif')\n", + "# explore default pasture risk map\n", + "a = rasterio.open(\n", + " \"../../datasets/processed/carbon_indicators/carbon_emissions_risk_cotton_4326_2000.tif\"\n", + ")\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Oranges')\n", + "plt.imshow(a.read(1), cmap=\"Oranges\")\n", "plt.show()" ] }, @@ -4808,7 +4872,7 @@ } ], "source": [ - "#calculate global cotton deforestation risk in 2000\n", + "# calculate global cotton deforestation risk in 2000\n", "!gdal_calc.py -A $total_carbon_emissions_2000 -B '../../datasets/processed/deforestation_indicators/deforestation_risk_rubber_4326_2000_NoData.tif' --outfile='../../datasets/processed/carbon_indicators/carbon_emissions_risk_rubber_4326_2000.tif' --calc=\"A*B\"" ] }, @@ -4841,12 +4905,14 @@ } ], "source": [ - "#explore default pasture risk map\n", - "a = rasterio.open('../../datasets/processed/carbon_indicators/carbon_emissions_risk_rubber_4326_2000.tif')\n", + "# explore default pasture risk map\n", + "a = rasterio.open(\n", + " \"../../datasets/processed/carbon_indicators/carbon_emissions_risk_rubber_4326_2000.tif\"\n", + ")\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Oranges')\n", + "plt.imshow(a.read(1), cmap=\"Oranges\")\n", "plt.show()" ] }, @@ -4938,12 +5004,14 @@ } ], "source": [ - "#explore default pasture risk map\n", - "a = rasterio.open('../../datasets/processed/carbon_indicators/carbon_emissions_risk_pasture_4326_2000_v2.tif')\n", + "# explore default pasture risk map\n", + "a = rasterio.open(\n", + " \"../../datasets/processed/carbon_indicators/carbon_emissions_risk_pasture_4326_2000_v2.tif\"\n", + ")\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Oranges')\n", + "plt.imshow(a.read(1), cmap=\"Oranges\")\n", "plt.show()" ] }, @@ -4975,7 +5043,7 @@ "metadata": {}, "outputs": [], "source": [ - "taxa_cf_2000 = '../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors_4326.tif'" + "taxa_cf_2000 = \"../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors_4326.tif\"" ] }, { @@ -5011,7 +5079,7 @@ "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Oranges')\n", + "plt.imshow(a.read(1), cmap=\"Oranges\")\n", "plt.show()" ] }, @@ -5061,11 +5129,13 @@ } ], "source": [ - "a = rasterio.open('../../datasets/processed/biodiversity_indicators/biodiversity_loss_risk_cotton_4326_2000.tif')\n", + "a = rasterio.open(\n", + " \"../../datasets/processed/biodiversity_indicators/biodiversity_loss_risk_cotton_4326_2000.tif\"\n", + ")\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Oranges')\n", + "plt.imshow(a.read(1), cmap=\"Oranges\")\n", "plt.show()" ] }, @@ -5122,11 +5192,13 @@ } ], "source": [ - "a = rasterio.open('../../datasets/processed/biodiversity_indicators/biodiversity_loss_risk_rubber_4326_2000.tif')\n", + "a = rasterio.open(\n", + " \"../../datasets/processed/biodiversity_indicators/biodiversity_loss_risk_rubber_4326_2000.tif\"\n", + ")\n", "print(a.crs)\n", "print(a.bounds)\n", "print(a.profile)\n", - "plt.imshow(a.read(1), cmap='Oranges')\n", + "plt.imshow(a.read(1), cmap=\"Oranges\")\n", "plt.show()" ] }, @@ -5171,22 +5243,30 @@ "outputs": [], "source": [ "## ruber\n", - "rubber_water_risk = '../../datasets/processed/water_indicators/water_risk_rubber_4326_2000_v2.tif'\n", - "rubber_deforestation_risk = '../../datasets/processed/deforestation_indicators/deforestation_risk_rubber_4326_2000_NoData.tif'\n", - "rubber_carbon_risk = '../../datasets/processed/carbon_indicators/carbon_emissions_risk_rubber_4326_2000_v2.tif'\n", - "rubber_biodiversity_risk = '../../datasets/processed/biodiversity_indicators/biodiversity_loss_risk_rubber_4326_2000_v2.tif'\n", + "rubber_water_risk = \"../../datasets/processed/water_indicators/water_risk_rubber_4326_2000_v2.tif\"\n", + "rubber_deforestation_risk = \"../../datasets/processed/deforestation_indicators/deforestation_risk_rubber_4326_2000_NoData.tif\"\n", + "rubber_carbon_risk = (\n", + " \"../../datasets/processed/carbon_indicators/carbon_emissions_risk_rubber_4326_2000_v2.tif\"\n", + ")\n", + "rubber_biodiversity_risk = \"../../datasets/processed/biodiversity_indicators/biodiversity_loss_risk_rubber_4326_2000_v2.tif\"\n", "\n", "# cotton\n", - "cotton_water_risk = '../../datasets/processed/water_indicators/water_risk_cotton_4326_2000_v2.tif'\n", - "cotton_deforestation_risk = '../../datasets/processed/deforestation_indicators/deforestation_risk_cotton_4326_2000_NoData.tif'\n", - "cotton_carbon_risk = '../../datasets/processed/carbon_indicators/carbon_emissions_risk_cotton_4326_2000_v2.tif'\n", - "cotton_biodiversity_risk = '../../datasets/processed/biodiversity_indicators/biodiversity_loss_risk_cotton_4326_2000_v2.tif'\n", + "cotton_water_risk = \"../../datasets/processed/water_indicators/water_risk_cotton_4326_2000_v2.tif\"\n", + "cotton_deforestation_risk = \"../../datasets/processed/deforestation_indicators/deforestation_risk_cotton_4326_2000_NoData.tif\"\n", + "cotton_carbon_risk = (\n", + " \"../../datasets/processed/carbon_indicators/carbon_emissions_risk_cotton_4326_2000_v2.tif\"\n", + ")\n", + "cotton_biodiversity_risk = \"../../datasets/processed/biodiversity_indicators/biodiversity_loss_risk_cotton_4326_2000_v2.tif\"\n", "\n", "# pasture\n", - "pasture_water_risk = \"../../datasets/processed/water_indicators/water_risk_pasture_4326_2000_v11.tif\"\n", - "pasture_deforestation_risk = '../../datasets/processed/deforestation_indicators/deforestation_risk_pasture_4326_2000_NoData_v2.tif'\n", - "pasture_carbon_risk = '../../datasets/processed/carbon_indicators/carbon_emissions_risk_pasture_4326_2000_v3.tif'\n", - "pasture_biodiversity_risk = '../../datasets/processed/biodiversity_indicators/biodiversity_loss_risk_pasture_4326_2000_v3.tif'" + "pasture_water_risk = (\n", + " \"../../datasets/processed/water_indicators/water_risk_pasture_4326_2000_v11.tif\"\n", + ")\n", + "pasture_deforestation_risk = \"../../datasets/processed/deforestation_indicators/deforestation_risk_pasture_4326_2000_NoData_v2.tif\"\n", + "pasture_carbon_risk = (\n", + " \"../../datasets/processed/carbon_indicators/carbon_emissions_risk_pasture_4326_2000_v3.tif\"\n", + ")\n", + "pasture_biodiversity_risk = \"../../datasets/processed/biodiversity_indicators/biodiversity_loss_risk_pasture_4326_2000_v3.tif\"" ] }, { @@ -5419,28 +5499,28 @@ } ], "source": [ - "#water risk\n", + "# water risk\n", "water_risk_mean_list = []\n", "water_risk_median_list = []\n", "water_risk_std_list = []\n", "water_risk_max_list = []\n", "water_risk_min_list = []\n", "\n", - "#deforestation risk\n", + "# deforestation risk\n", "deforestation_risk_mean_list = []\n", "deforestation_risk_median_list = []\n", "deforestation_risk_std_list = []\n", "deforestation_risk_max_list = []\n", "deforestation_risk_min_list = []\n", "\n", - "#carbon risk\n", + "# carbon risk\n", "carbon_risk_mean_list = []\n", "carbon_risk_median_list = []\n", "carbon_risk_std_list = []\n", "carbon_risk_max_list = []\n", "carbon_risk_min_list = []\n", "\n", - "#biodiversity risk\n", + "# biodiversity risk\n", "biodiversity_risk_mean_list = []\n", "biodiversity_risk_median_list = []\n", "biodiversity_risk_std_list = []\n", @@ -5449,82 +5529,80 @@ "\n", "start_time = time.time()\n", "for i, row in supply_data.iterrows():\n", - " material = row['Material']\n", - " geom = row['geometry']\n", + " material = row[\"Material\"]\n", + " geom = row[\"geometry\"]\n", "\n", - " if material == 'Rubber':\n", - " #calculate risk for water\n", - " water_stats = zonal_stats(geom, rubber_water_risk,\n", - " stats=\"mean median std max min\")\n", - " #calculate risk for deforestation\n", - " deforestation_stats = zonal_stats(geom, rubber_deforestation_risk,\n", - " stats=\"mean median std max min\")\n", - " #calculate risk for carbon\n", - " carbon_stats = zonal_stats(geom, rubber_carbon_risk,\n", - " stats=\"mean median std max min\")\n", - " #calculate risk for biodiversity\n", - " biodiversity_stats = zonal_stats(geom, rubber_biodiversity_risk,\n", - " stats=\"mean median std max min\")\n", - " \n", - " if material == 'Cotton':\n", - " #calculate risk for water\n", - " water_stats = zonal_stats(geom, cotton_water_risk,\n", - " stats=\"mean median std max min\")\n", - " #calculate risk for deforestation\n", - " deforestation_stats = zonal_stats(geom, cotton_deforestation_risk,\n", - " stats=\"mean median std max min\")\n", - " #calculate risk for carbon\n", - " carbon_stats = zonal_stats(geom, cotton_carbon_risk,\n", - " stats=\"mean median std max min\")\n", - " #calculate risk for biodiversity\n", - " biodiversity_stats = zonal_stats(geom, cotton_biodiversity_risk,\n", - " stats=\"mean median std max min\")\n", - " \n", - " \n", - " if material == 'Leather':\n", - " #calculate risk for water \n", - " water_stats = zonal_stats(geom, pasture_water_risk,\n", - " stats=\"mean median std max min\")\n", - " #calculate risk for deforestation\n", - " deforestation_stats = zonal_stats(geom, pasture_deforestation_risk,\n", - " stats=\"mean median std max min\")\n", - " #calculate risk for carbon\n", - " carbon_stats = zonal_stats(geom, pasture_carbon_risk,\n", - " stats=\"mean median std max min\")\n", - " #calculate risk for biodiversity\n", - " biodiversity_stats = zonal_stats(geom, pasture_biodiversity_risk,\n", - " stats=\"mean median std max min\")\n", + " if material == \"Rubber\":\n", + " # calculate risk for water\n", + " water_stats = zonal_stats(geom, rubber_water_risk, stats=\"mean median std max min\")\n", + " # calculate risk for deforestation\n", + " deforestation_stats = zonal_stats(\n", + " geom, rubber_deforestation_risk, stats=\"mean median std max min\"\n", + " )\n", + " # calculate risk for carbon\n", + " carbon_stats = zonal_stats(geom, rubber_carbon_risk, stats=\"mean median std max min\")\n", + " # calculate risk for biodiversity\n", + " biodiversity_stats = zonal_stats(\n", + " geom, rubber_biodiversity_risk, stats=\"mean median std max min\"\n", + " )\n", + "\n", + " if material == \"Cotton\":\n", + " # calculate risk for water\n", + " water_stats = zonal_stats(geom, cotton_water_risk, stats=\"mean median std max min\")\n", + " # calculate risk for deforestation\n", + " deforestation_stats = zonal_stats(\n", + " geom, cotton_deforestation_risk, stats=\"mean median std max min\"\n", + " )\n", + " # calculate risk for carbon\n", + " carbon_stats = zonal_stats(geom, cotton_carbon_risk, stats=\"mean median std max min\")\n", + " # calculate risk for biodiversity\n", + " biodiversity_stats = zonal_stats(\n", + " geom, cotton_biodiversity_risk, stats=\"mean median std max min\"\n", + " )\n", + "\n", + " if material == \"Leather\":\n", + " # calculate risk for water\n", + " water_stats = zonal_stats(geom, pasture_water_risk, stats=\"mean median std max min\")\n", + " # calculate risk for deforestation\n", + " deforestation_stats = zonal_stats(\n", + " geom, pasture_deforestation_risk, stats=\"mean median std max min\"\n", + " )\n", + " # calculate risk for carbon\n", + " carbon_stats = zonal_stats(geom, pasture_carbon_risk, stats=\"mean median std max min\")\n", + " # calculate risk for biodiversity\n", + " biodiversity_stats = zonal_stats(\n", + " geom, pasture_biodiversity_risk, stats=\"mean median std max min\"\n", + " )\n", "\n", - " \n", " ##APPEND RISK\n", - " #water risk\n", - " water_risk_mean_list.append(water_stats[0]['mean'])\n", - " water_risk_median_list.append(water_stats[0]['median'])\n", - " water_risk_std_list.append(water_stats[0]['std'])\n", - " water_risk_max_list.append(water_stats[0]['max'])\n", - " water_risk_min_list.append(water_stats[0]['min'])\n", - " \n", - " #deforestation risk\n", - " deforestation_risk_mean_list.append(deforestation_stats[0]['mean'])\n", - " deforestation_risk_median_list.append(deforestation_stats[0]['median'])\n", - " deforestation_risk_std_list.append(deforestation_stats[0]['std'])\n", - " deforestation_risk_max_list.append(deforestation_stats[0]['max'])\n", - " deforestation_risk_min_list.append(deforestation_stats[0]['min'])\n", + " # water risk\n", + " water_risk_mean_list.append(water_stats[0][\"mean\"])\n", + " water_risk_median_list.append(water_stats[0][\"median\"])\n", + " water_risk_std_list.append(water_stats[0][\"std\"])\n", + " water_risk_max_list.append(water_stats[0][\"max\"])\n", + " water_risk_min_list.append(water_stats[0][\"min\"])\n", "\n", - " #carbon risk\n", - " carbon_risk_mean_list.append(carbon_stats[0]['mean'])\n", - " carbon_risk_median_list.append(carbon_stats[0]['median'])\n", - " carbon_risk_std_list.append(carbon_stats[0]['std'])\n", - " carbon_risk_max_list.append(carbon_stats[0]['max'])\n", - " carbon_risk_min_list.append(carbon_stats[0]['min'])\n", + " # deforestation risk\n", + " deforestation_risk_mean_list.append(deforestation_stats[0][\"mean\"])\n", + " deforestation_risk_median_list.append(deforestation_stats[0][\"median\"])\n", + " deforestation_risk_std_list.append(deforestation_stats[0][\"std\"])\n", + " deforestation_risk_max_list.append(deforestation_stats[0][\"max\"])\n", + " deforestation_risk_min_list.append(deforestation_stats[0][\"min\"])\n", + "\n", + " # carbon risk\n", + " carbon_risk_mean_list.append(carbon_stats[0][\"mean\"])\n", + " carbon_risk_median_list.append(carbon_stats[0][\"median\"])\n", + " carbon_risk_std_list.append(carbon_stats[0][\"std\"])\n", + " carbon_risk_max_list.append(carbon_stats[0][\"max\"])\n", + " carbon_risk_min_list.append(carbon_stats[0][\"min\"])\n", + "\n", + " # biodiversity risk\n", + " biodiversity_risk_mean_list.append(biodiversity_stats[0][\"mean\"])\n", + " biodiversity_risk_median_list.append(biodiversity_stats[0][\"median\"])\n", + " biodiversity_risk_std_list.append(biodiversity_stats[0][\"std\"])\n", + " biodiversity_risk_max_list.append(biodiversity_stats[0][\"max\"])\n", + " biodiversity_risk_min_list.append(biodiversity_stats[0][\"min\"])\n", "\n", - " #biodiversity risk\n", - " biodiversity_risk_mean_list.append(biodiversity_stats[0]['mean'])\n", - " biodiversity_risk_median_list.append(biodiversity_stats[0]['median'])\n", - " biodiversity_risk_std_list.append(biodiversity_stats[0]['std'])\n", - " biodiversity_risk_max_list.append(biodiversity_stats[0]['max'])\n", - " biodiversity_risk_min_list.append(biodiversity_stats[0]['min'])\n", - " \n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -5562,27 +5640,27 @@ } ], "source": [ - "print(f'Len of supply data:{len(supply_data)}')\n", - "print(f'water Len of stats: {len(water_risk_mean_list)}')\n", - "print(f'water Len of stats: {len(water_risk_median_list)}')\n", - "print(f'water Len of stats: {len(water_risk_std_list)}')\n", - "print(f'water Len of stats: {len(water_risk_min_list)}')\n", - "print(f'water Len of stats: {len(water_risk_max_list)}')\n", - "print(f'deforestation Len of stats: {len(deforestation_risk_mean_list)}')\n", - "print(f'deforestation Len of stats: {len(deforestation_risk_median_list)}')\n", - "print(f'deforestation Len of stats: {len(deforestation_risk_std_list)}')\n", - "print(f'deforestation Len of stats: {len(deforestation_risk_min_list)}')\n", - "print(f'deforestation Len of stats: {len(deforestation_risk_max_list)}')\n", - "print(f'carbon Len of stats: {len(carbon_risk_mean_list)}')\n", - "print(f'carbon Len of stats: {len(carbon_risk_median_list)}')\n", - "print(f'carbon Len of stats: {len(carbon_risk_std_list)}')\n", - "print(f'carbon Len of stats: {len(carbon_risk_min_list)}')\n", - "print(f'carbon Len of stats: {len(carbon_risk_max_list)}')\n", - "print(f'biodiversity Len of stats: {len(biodiversity_risk_mean_list)}')\n", - "print(f'biodiversity Len of stats: {len(biodiversity_risk_median_list)}')\n", - "print(f'biodiversity Len of stats: {len(biodiversity_risk_std_list)}')\n", - "print(f'biodiversity Len of stats: {len(biodiversity_risk_min_list)}')\n", - "print(f'biodiversity Len of stats: {len(biodiversity_risk_max_list)}')\n" + "print(f\"Len of supply data:{len(supply_data)}\")\n", + "print(f\"water Len of stats: {len(water_risk_mean_list)}\")\n", + "print(f\"water Len of stats: {len(water_risk_median_list)}\")\n", + "print(f\"water Len of stats: {len(water_risk_std_list)}\")\n", + "print(f\"water Len of stats: {len(water_risk_min_list)}\")\n", + "print(f\"water Len of stats: {len(water_risk_max_list)}\")\n", + "print(f\"deforestation Len of stats: {len(deforestation_risk_mean_list)}\")\n", + "print(f\"deforestation Len of stats: {len(deforestation_risk_median_list)}\")\n", + "print(f\"deforestation Len of stats: {len(deforestation_risk_std_list)}\")\n", + "print(f\"deforestation Len of stats: {len(deforestation_risk_min_list)}\")\n", + "print(f\"deforestation Len of stats: {len(deforestation_risk_max_list)}\")\n", + "print(f\"carbon Len of stats: {len(carbon_risk_mean_list)}\")\n", + "print(f\"carbon Len of stats: {len(carbon_risk_median_list)}\")\n", + "print(f\"carbon Len of stats: {len(carbon_risk_std_list)}\")\n", + "print(f\"carbon Len of stats: {len(carbon_risk_min_list)}\")\n", + "print(f\"carbon Len of stats: {len(carbon_risk_max_list)}\")\n", + "print(f\"biodiversity Len of stats: {len(biodiversity_risk_mean_list)}\")\n", + "print(f\"biodiversity Len of stats: {len(biodiversity_risk_median_list)}\")\n", + "print(f\"biodiversity Len of stats: {len(biodiversity_risk_std_list)}\")\n", + "print(f\"biodiversity Len of stats: {len(biodiversity_risk_min_list)}\")\n", + "print(f\"biodiversity Len of stats: {len(biodiversity_risk_max_list)}\")" ] }, { @@ -5592,26 +5670,26 @@ "outputs": [], "source": [ "## apend to supply dataframe\n", - "supply_data['wr_mean'] = water_risk_mean_list\n", - "supply_data['wr_median'] = water_risk_median_list\n", - "supply_data['wr_std'] = water_risk_std_list\n", - "supply_data['wr_max'] = water_risk_max_list\n", - "supply_data['wr_min'] = water_risk_min_list\n", - "supply_data['df_mean'] = deforestation_risk_mean_list\n", - "supply_data['df_median'] = deforestation_risk_median_list\n", - "supply_data['df_std'] = deforestation_risk_std_list\n", - "supply_data['df_min'] = deforestation_risk_min_list\n", - "supply_data['df_max'] = deforestation_risk_max_list\n", - "supply_data['cr_mean'] = carbon_risk_mean_list\n", - "supply_data['cr_median'] = carbon_risk_median_list\n", - "supply_data['cr_std'] = carbon_risk_std_list\n", - "supply_data['cr_min'] = carbon_risk_min_list\n", - "supply_data['cr_max'] = carbon_risk_max_list\n", - "supply_data['bio_mean'] = biodiversity_risk_mean_list\n", - "supply_data['bio_median'] = biodiversity_risk_median_list\n", - "supply_data['bio_std'] = biodiversity_risk_std_list\n", - "supply_data['bio_min'] = biodiversity_risk_min_list\n", - "supply_data['bio_max'] = biodiversity_risk_max_list" + "supply_data[\"wr_mean\"] = water_risk_mean_list\n", + "supply_data[\"wr_median\"] = water_risk_median_list\n", + "supply_data[\"wr_std\"] = water_risk_std_list\n", + "supply_data[\"wr_max\"] = water_risk_max_list\n", + "supply_data[\"wr_min\"] = water_risk_min_list\n", + "supply_data[\"df_mean\"] = deforestation_risk_mean_list\n", + "supply_data[\"df_median\"] = deforestation_risk_median_list\n", + "supply_data[\"df_std\"] = deforestation_risk_std_list\n", + "supply_data[\"df_min\"] = deforestation_risk_min_list\n", + "supply_data[\"df_max\"] = deforestation_risk_max_list\n", + "supply_data[\"cr_mean\"] = carbon_risk_mean_list\n", + "supply_data[\"cr_median\"] = carbon_risk_median_list\n", + "supply_data[\"cr_std\"] = carbon_risk_std_list\n", + "supply_data[\"cr_min\"] = carbon_risk_min_list\n", + "supply_data[\"cr_max\"] = carbon_risk_max_list\n", + "supply_data[\"bio_mean\"] = biodiversity_risk_mean_list\n", + "supply_data[\"bio_median\"] = biodiversity_risk_median_list\n", + "supply_data[\"bio_std\"] = biodiversity_risk_std_list\n", + "supply_data[\"bio_min\"] = biodiversity_risk_min_list\n", + "supply_data[\"bio_max\"] = biodiversity_risk_max_list" ] }, { @@ -5836,7 +5914,10 @@ "metadata": {}, "outputs": [], "source": [ - "supply_data.to_file('../../datasets/processed/user_data/located_lg_data_polygon_v2_stats.shp', driver='ESRI Shapefile',)" + "supply_data.to_file(\n", + " \"../../datasets/processed/user_data/located_lg_data_polygon_v2_stats.shp\",\n", + " driver=\"ESRI Shapefile\",\n", + ")" ] }, { diff --git a/data/notebooks/Lab/6_analysis_exploration.ipynb b/data/notebooks/Lab/6_analysis_exploration.ipynb index a5d242009..fc40b6ba7 100644 --- a/data/notebooks/Lab/6_analysis_exploration.ipynb +++ b/data/notebooks/Lab/6_analysis_exploration.ipynb @@ -358,26 +358,23 @@ ], "source": [ "# import libraries\n", - "import pandas as pd\n", - "import geopandas as gpd\n", - "from geopandas.tools import sjoin\n", - "\n", - "\n", "# Data\n", "from collections import Counter\n", "from math import pi\n", - "from bokeh.palettes import BuGn\n", - "from bokeh.transform import cumsum\n", "\n", + "import geopandas as gpd\n", + "import pandas as pd\n", "import pandas_bokeh\n", - "from bokeh.plotting import figure\n", - "from bokeh.io import output_notebook, show\n", + "from bokeh.io import show\n", "from bokeh.models import ColumnDataSource\n", + "\n", "# Create Bokeh-Table with DataFrame:\n", "from bokeh.models.widgets import DataTable, TableColumn\n", - "from bokeh.transform import dodge\n", - "from bokeh.transform import factor_cmap\n", - "from bokeh.palettes import Spectral6, Spectral10\n", + "from bokeh.palettes import BuGn, Spectral10\n", + "from bokeh.plotting import figure\n", + "from bokeh.transform import cumsum, dodge\n", + "from geopandas.tools import sjoin\n", + "\n", "pandas_bokeh.output_notebook()" ] }, @@ -622,7 +619,9 @@ } ], "source": [ - "mock_data = gpd.read_file('../../datasets/processed/processed_data/located_lg_data_polygon_v2_stats.shp')\n", + "mock_data = gpd.read_file(\n", + " \"../../datasets/processed/processed_data/located_lg_data_polygon_v2_stats.shp\"\n", + ")\n", "mock_data.head()" ] }, @@ -843,40 +842,40 @@ "## calulate impact\n", "\n", "\n", - "#calculate water risk impact\n", - "wr_impact = [row['Volume']*row['wr_mean'] for i,row in mock_data.iterrows()]\n", - "wr_impact_min = [row['Volume']*row['wr_min'] for i,row in mock_data.iterrows()]\n", - "wr_impact_max = [row['Volume']*row['wr_max'] for i,row in mock_data.iterrows()]\n", + "# calculate water risk impact\n", + "wr_impact = [row[\"Volume\"] * row[\"wr_mean\"] for i, row in mock_data.iterrows()]\n", + "wr_impact_min = [row[\"Volume\"] * row[\"wr_min\"] for i, row in mock_data.iterrows()]\n", + "wr_impact_max = [row[\"Volume\"] * row[\"wr_max\"] for i, row in mock_data.iterrows()]\n", "\n", - "#calculate deforestation impact\n", - "df_impact = [row['Volume']*row['df_mean'] for i,row in mock_data.iterrows()]\n", - "df_impact_min = [row['Volume']*row['df_min'] for i,row in mock_data.iterrows()]\n", - "df_impact_max = [row['Volume']*row['df_max'] for i,row in mock_data.iterrows()]\n", + "# calculate deforestation impact\n", + "df_impact = [row[\"Volume\"] * row[\"df_mean\"] for i, row in mock_data.iterrows()]\n", + "df_impact_min = [row[\"Volume\"] * row[\"df_min\"] for i, row in mock_data.iterrows()]\n", + "df_impact_max = [row[\"Volume\"] * row[\"df_max\"] for i, row in mock_data.iterrows()]\n", "\n", - "#calculate carbon impacts\n", - "cr_impact = [row['Volume']*row['cr_mean'] for i,row in mock_data.iterrows()]\n", - "cr_impact_min = [row['Volume']*row['cr_min'] for i,row in mock_data.iterrows()]\n", - "cr_impact_max = [row['Volume']*row['cr_max'] for i,row in mock_data.iterrows()]\n", + "# calculate carbon impacts\n", + "cr_impact = [row[\"Volume\"] * row[\"cr_mean\"] for i, row in mock_data.iterrows()]\n", + "cr_impact_min = [row[\"Volume\"] * row[\"cr_min\"] for i, row in mock_data.iterrows()]\n", + "cr_impact_max = [row[\"Volume\"] * row[\"cr_max\"] for i, row in mock_data.iterrows()]\n", "\n", - "#calculate biodiversity impacts\n", - "bio_impacts = [row['Volume']*row['bio_mean'] for i,row in mock_data.iterrows()]\n", - "bio_impacts_min = [row['Volume']*row['bio_min'] for i,row in mock_data.iterrows()]\n", - "bio_impacts_max = [row['Volume']*row['bio_max'] for i,row in mock_data.iterrows()]\n", + "# calculate biodiversity impacts\n", + "bio_impacts = [row[\"Volume\"] * row[\"bio_mean\"] for i, row in mock_data.iterrows()]\n", + "bio_impacts_min = [row[\"Volume\"] * row[\"bio_min\"] for i, row in mock_data.iterrows()]\n", + "bio_impacts_max = [row[\"Volume\"] * row[\"bio_max\"] for i, row in mock_data.iterrows()]\n", "\n", "\n", "##append to dataframe\n", - "mock_data['wr_imp']=wr_impact\n", - "mock_data['wr_imp_min']=wr_impact_min\n", - "mock_data['wr_imp_max']=wr_impact_max\n", - "mock_data['df_imp']=df_impact\n", - "mock_data['df_imp_min']=df_impact_min\n", - "mock_data['df_imp_max']=df_impact_max\n", - "mock_data['cr_imp']=cr_impact\n", - "mock_data['cr_imp_min']=cr_impact_min\n", - "mock_data['cr_imp_max']=cr_impact_max\n", - "mock_data['bio_imp']=bio_impacts\n", - "mock_data['bio_imp_min']=bio_impacts_min\n", - "mock_data['bio_imp_max']=bio_impacts_max\n", + "mock_data[\"wr_imp\"] = wr_impact\n", + "mock_data[\"wr_imp_min\"] = wr_impact_min\n", + "mock_data[\"wr_imp_max\"] = wr_impact_max\n", + "mock_data[\"df_imp\"] = df_impact\n", + "mock_data[\"df_imp_min\"] = df_impact_min\n", + "mock_data[\"df_imp_max\"] = df_impact_max\n", + "mock_data[\"cr_imp\"] = cr_impact\n", + "mock_data[\"cr_imp_min\"] = cr_impact_min\n", + "mock_data[\"cr_imp_max\"] = cr_impact_max\n", + "mock_data[\"bio_imp\"] = bio_impacts\n", + "mock_data[\"bio_imp_min\"] = bio_impacts_min\n", + "mock_data[\"bio_imp_max\"] = bio_impacts_max\n", "\n", "\n", "mock_data.head()" @@ -898,9 +897,12 @@ } ], "source": [ - "#export dataframe\n", + "# export dataframe\n", "\n", - "mock_data.to_file('../../datasets/processed/processed_data/located_lg_data_polygon_v2_stats_impacts.shp',driver='ESRI Shapefile')" + "mock_data.to_file(\n", + " \"../../datasets/processed/processed_data/located_lg_data_polygon_v2_stats_impacts.shp\",\n", + " driver=\"ESRI Shapefile\",\n", + ")" ] }, { @@ -1066,7 +1068,7 @@ } ], "source": [ - "continents = gpd.read_file('../../datasets/raw/input_data_test/continents.shp')\n", + "continents = gpd.read_file(\"../../datasets/raw/input_data_test/continents.shp\")\n", "continents" ] }, @@ -1510,10 +1512,12 @@ } ], "source": [ - "#group by country\n", - "countries_volume_df = join_df.groupby('Country').sum()\n", + "# group by country\n", + "countries_volume_df = join_df.groupby(\"Country\").sum()\n", "##add geometry\n", - "country_geoms = countries_volume_df.merge(country_geoms, right_on='Country',left_on='Country', how='inner').drop_duplicates()\n", + "country_geoms = countries_volume_df.merge(\n", + " country_geoms, right_on=\"Country\", left_on=\"Country\", how=\"inner\"\n", + ").drop_duplicates()\n", "\n", "country_geoms.head()" ] @@ -1739,7 +1743,7 @@ } ], "source": [ - "country_geoms = country_geoms.drop_duplicates(subset=['Country'])\n", + "country_geoms = country_geoms.drop_duplicates(subset=[\"Country\"])\n", "country_geoms.head()" ] }, @@ -1964,11 +1968,13 @@ } ], "source": [ - "#group by continent\n", - "#group by country\n", - "continents_volume_df = join_df.groupby('CONTINENT').sum()\n", - "#add geometry\n", - "continents_geom = continents[['CONTINENT','geometry']].merge(continents_volume_df, right_on='CONTINENT',left_on='CONTINENT', how='inner' )\n", + "# group by continent\n", + "# group by country\n", + "continents_volume_df = join_df.groupby(\"CONTINENT\").sum()\n", + "# add geometry\n", + "continents_geom = continents[[\"CONTINENT\", \"geometry\"]].merge(\n", + " continents_volume_df, right_on=\"CONTINENT\", left_on=\"CONTINENT\", how=\"inner\"\n", + ")\n", "\n", "continents_geom.head()" ] @@ -1991,9 +1997,13 @@ } ], "source": [ - "#export for visualizatuon in qgis\n", - "country_geoms.to_file('../../datasets/processed/processed_data/test_vis/country_sum.shp', driver='ESRI Shapefile')\n", - "continents_geom.to_file('../../datasets/processed/processed_data/test_vis/continents_sum.shp', driver='ESRI Shapefile')" + "# export for visualizatuon in qgis\n", + "country_geoms.to_file(\n", + " \"../../datasets/processed/processed_data/test_vis/country_sum.shp\", driver=\"ESRI Shapefile\"\n", + ")\n", + "continents_geom.to_file(\n", + " \"../../datasets/processed/processed_data/test_vis/continents_sum.shp\", driver=\"ESRI Shapefile\"\n", + ")" ] }, { @@ -2217,7 +2227,7 @@ } ], "source": [ - "country_geoms = country_geoms.sort_values('Volume', ascending=True)\n", + "country_geoms = country_geoms.sort_values(\"Volume\", ascending=True)\n", "country_geoms.head()" ] }, @@ -2311,44 +2321,63 @@ } ], "source": [ - "country = list(country_geoms['Country'])\n", - "volume = list(country_geoms['Volume'])\n", + "country = list(country_geoms[\"Country\"])\n", + "volume = list(country_geoms[\"Volume\"])\n", "\n", - "#represent top countries by volume\n", - "data = {'country': country,\n", - " 'volume': volume}\n", + "# represent top countries by volume\n", + "data = {\"country\": country, \"volume\": volume}\n", "\n", "source = ColumnDataSource(data)\n", "\n", - "p = figure(y_range=country, x_range=(0, 26800), plot_width=250, title=\"Top countries by volume (Tonnes)\",\n", - " toolbar_location=None, tools=\"\")\n", + "p = figure(\n", + " y_range=country,\n", + " x_range=(0, 26800),\n", + " plot_width=250,\n", + " title=\"Top countries by volume (Tonnes)\",\n", + " toolbar_location=None,\n", + " tools=\"\",\n", + ")\n", "\n", - "p.hbar(y=dodge('country', 0, range=p.y_range), right='volume', height=0.2, source=source,\n", - " color=\"#c9d9d3\")\n", + "p.hbar(\n", + " y=dodge(\"country\", 0, range=p.y_range),\n", + " right=\"volume\",\n", + " height=0.2,\n", + " source=source,\n", + " color=\"#c9d9d3\",\n", + ")\n", "\n", "p.y_range.range_padding = 0.1\n", "p.ygrid.grid_line_color = None\n", "\n", - "#represent top countries by percentage\n", - "volume_pct = [round((val*100)/sum(volume),2) for val in volume]\n", + "# represent top countries by percentage\n", + "volume_pct = [round((val * 100) / sum(volume), 2) for val in volume]\n", "\n", - "data_pct = {'country': country,\n", - " 'volume_pct': volume_pct}\n", + "data_pct = {\"country\": country, \"volume_pct\": volume_pct}\n", "\n", "source_pct = ColumnDataSource(data_pct)\n", "\n", - "p_pct = figure(y_range=country, x_range=(0, 100), plot_width=250, title=\"Top countries by volume (%)\",\n", - " toolbar_location=None, tools=\"\")\n", + "p_pct = figure(\n", + " y_range=country,\n", + " x_range=(0, 100),\n", + " plot_width=250,\n", + " title=\"Top countries by volume (%)\",\n", + " toolbar_location=None,\n", + " tools=\"\",\n", + ")\n", "\n", - "p_pct.hbar(y=dodge('country', 0, range=p.y_range), right='volume_pct', height=0.2, source=source_pct,\n", - " color=\"#c9d9d3\")\n", + "p_pct.hbar(\n", + " y=dodge(\"country\", 0, range=p.y_range),\n", + " right=\"volume_pct\",\n", + " height=0.2,\n", + " source=source_pct,\n", + " color=\"#c9d9d3\",\n", + ")\n", "\n", "p_pct.y_range.range_padding = 0.1\n", "p_pct.ygrid.grid_line_color = None\n", "\n", "\n", - "\n", - "#Make Dashboard with Grid Layout:\n", + "# Make Dashboard with Grid Layout:\n", "pandas_bokeh.plot_grid([[p, p_pct]], plot_width=450)" ] }, @@ -2450,18 +2479,18 @@ } ], "source": [ - "#group by materials\n", - "#group by material\n", - "risk_material = mock_data.groupby('Material').sum()\n", + "# group by materials\n", + "# group by material\n", + "risk_material = mock_data.groupby(\"Material\").sum()\n", "\n", "## volume dataframe\n", "materials = list(risk_material.index)\n", - "volumens = list(risk_material['Volume'])\n", + "volumens = list(risk_material[\"Volume\"])\n", "df = gpd.GeoDataFrame()\n", - "df['materials']=materials\n", - "df['volume']=volumens# Create Bokeh-Table with DataFrame:\n", - "from bokeh.models.widgets import DataTable, TableColumn\n", + "df[\"materials\"] = materials\n", + "df[\"volume\"] = volumens # Create Bokeh-Table with DataFrame:\n", "from bokeh.models import ColumnDataSource\n", + "from bokeh.models.widgets import DataTable, TableColumn\n", "\n", "data_table = DataTable(\n", " columns=[TableColumn(field=Ci, title=Ci) for Ci in df.columns],\n", @@ -2469,13 +2498,13 @@ " height=300,\n", ")\n", "\n", - "p_bar = risk_material[['Volume']].plot_bokeh(\n", - " kind='bar',\n", + "p_bar = risk_material[[\"Volume\"]].plot_bokeh(\n", + " kind=\"bar\",\n", " title=\"Total volume purchased by Material\",\n", " show_figure=False,\n", - ") \n", + ")\n", "\n", - "#Combine Table and Scatterplot via grid layout:\n", + "# Combine Table and Scatterplot via grid layout:\n", "pandas_bokeh.plot_grid([[data_table, p_bar]], plot_width=300, plot_height=350)" ] }, @@ -2548,27 +2577,41 @@ } ], "source": [ + "x = Counter({\"Cotton\": 48745, \"Rubber\": 19510, \"Leather\": 8155})\n", "\n", - "\n", - "x = Counter({\n", - " 'Cotton': 48745, 'Rubber': 19510, 'Leather': 8155\n", - "})\n", - "\n", - "data = pd.DataFrame.from_dict(dict(x), orient='index').reset_index().rename(index=str, columns={0:'value', 'index':'Commodity'})\n", - "data['angle'] = data['value']/sum(x.values()) * 2*pi\n", - "data['color'] = BuGn[len(x)]\n", + "data = (\n", + " pd.DataFrame.from_dict(dict(x), orient=\"index\")\n", + " .reset_index()\n", + " .rename(index=str, columns={0: \"value\", \"index\": \"Commodity\"})\n", + ")\n", + "data[\"angle\"] = data[\"value\"] / sum(x.values()) * 2 * pi\n", + "data[\"color\"] = BuGn[len(x)]\n", "\n", "# Plotting code\n", "\n", - "p = figure(plot_height=350, title=\"Purchased volume by material (tonnes)\", toolbar_location=None,\n", - " tools=\"hover\", tooltips=[(\"Commodity\", \"@Commodity\"),(\"Value\", \"@value\")])\n", + "p = figure(\n", + " plot_height=350,\n", + " title=\"Purchased volume by material (tonnes)\",\n", + " toolbar_location=None,\n", + " tools=\"hover\",\n", + " tooltips=[(\"Commodity\", \"@Commodity\"), (\"Value\", \"@value\")],\n", + ")\n", "\n", - "p.annular_wedge(x=0, y=1, inner_radius=0.2, outer_radius=0.4,\n", - " start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),\n", - " line_color=\"white\", fill_color='color', legend='Commodity', source=data)\n", + "p.annular_wedge(\n", + " x=0,\n", + " y=1,\n", + " inner_radius=0.2,\n", + " outer_radius=0.4,\n", + " start_angle=cumsum(\"angle\", include_zero=True),\n", + " end_angle=cumsum(\"angle\"),\n", + " line_color=\"white\",\n", + " fill_color=\"color\",\n", + " legend=\"Commodity\",\n", + " source=data,\n", + ")\n", "\n", - "p.axis.axis_label=None\n", - "p.axis.visible=False\n", + "p.axis.axis_label = None\n", + "p.axis.visible = False\n", "p.grid.grid_line_color = None\n", "\n", "show(p)" @@ -2812,8 +2855,8 @@ } ], "source": [ - "#group by material and country\n", - "group_m_c = mock_data.groupby(['Country','Material']).sum()[['Volume']].sort_values('Volume')\n", + "# group by material and country\n", + "group_m_c = mock_data.groupby([\"Country\", \"Material\"]).sum()[[\"Volume\"]].sort_values(\"Volume\")\n", "group_m_c" ] }, @@ -2886,29 +2929,139 @@ } ], "source": [ - "#countries = list(set(sorted_.index.get_level_values(0)))\n", - "commodities = ['Cotton', 'Rubber', 'Leather']\n", + "# countries = list(set(sorted_.index.get_level_values(0)))\n", + "commodities = [\"Cotton\", \"Rubber\", \"Leather\"]\n", "\n", "data = {\n", - " 'countries':['Argentina','Australia','Bangladesh','Brazil','Burundi','Canada','China',\"Cote d'Ivoire\",'Greece','India','Indonesia','Italy','Japan','Korea','Liberia','Malaysia','Thailand','Turkey','United States','United states','Uzbekistan','Vietnam'],\n", - " 'cotton':[0,5900,1400,1600, 0, 0,22600,0,3300,2545,0,0,0,0,0,0,1200,0,7000,600, 2600],\n", - " 'rubber':[0,0,0,0,0,0,2400,1100,0,1690,2600,0,730,0,2300,2040,4840,0,1000,0,810],\n", - " 'leather':[140,2740,0,480,680,125,1800,0,0,0,0,790,0,160,0,0,180,0,800,0,260]\n", + " \"countries\": [\n", + " \"Argentina\",\n", + " \"Australia\",\n", + " \"Bangladesh\",\n", + " \"Brazil\",\n", + " \"Burundi\",\n", + " \"Canada\",\n", + " \"China\",\n", + " \"Cote d'Ivoire\",\n", + " \"Greece\",\n", + " \"India\",\n", + " \"Indonesia\",\n", + " \"Italy\",\n", + " \"Japan\",\n", + " \"Korea\",\n", + " \"Liberia\",\n", + " \"Malaysia\",\n", + " \"Thailand\",\n", + " \"Turkey\",\n", + " \"United States\",\n", + " \"United states\",\n", + " \"Uzbekistan\",\n", + " \"Vietnam\",\n", + " ],\n", + " \"cotton\": [\n", + " 0,\n", + " 5900,\n", + " 1400,\n", + " 1600,\n", + " 0,\n", + " 0,\n", + " 22600,\n", + " 0,\n", + " 3300,\n", + " 2545,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 1200,\n", + " 0,\n", + " 7000,\n", + " 600,\n", + " 2600,\n", + " ],\n", + " \"rubber\": [\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 2400,\n", + " 1100,\n", + " 0,\n", + " 1690,\n", + " 2600,\n", + " 0,\n", + " 730,\n", + " 0,\n", + " 2300,\n", + " 2040,\n", + " 4840,\n", + " 0,\n", + " 1000,\n", + " 0,\n", + " 810,\n", + " ],\n", + " \"leather\": [\n", + " 140,\n", + " 2740,\n", + " 0,\n", + " 480,\n", + " 680,\n", + " 125,\n", + " 1800,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 0,\n", + " 790,\n", + " 0,\n", + " 160,\n", + " 0,\n", + " 0,\n", + " 180,\n", + " 0,\n", + " 800,\n", + " 0,\n", + " 260,\n", + " ],\n", "}\n", "\n", "source = ColumnDataSource(data=data)\n", "\n", - "p = figure(y_range=countries, x_range=(0, 22600), plot_width=400, title=\"Commodities bought by Country\",\n", - " toolbar_location=None, tools=\"\")\n", + "p = figure(\n", + " y_range=countries,\n", + " x_range=(0, 22600),\n", + " plot_width=400,\n", + " title=\"Commodities bought by Country\",\n", + " toolbar_location=None,\n", + " tools=\"\",\n", + ")\n", "\n", - "p.hbar(y=dodge('countries', -0.25, range=p.y_range), right='cotton', height=0.2, source=source,\n", - " color=\"#c9d9d3\")\n", + "p.hbar(\n", + " y=dodge(\"countries\", -0.25, range=p.y_range),\n", + " right=\"cotton\",\n", + " height=0.2,\n", + " source=source,\n", + " color=\"#c9d9d3\",\n", + ")\n", "\n", - "p.hbar(y=dodge('countries', 0.0, range=p.y_range), right='rubber', height=0.2, source=source,\n", - " color=\"#718dbf\")\n", + "p.hbar(\n", + " y=dodge(\"countries\", 0.0, range=p.y_range),\n", + " right=\"rubber\",\n", + " height=0.2,\n", + " source=source,\n", + " color=\"#718dbf\",\n", + ")\n", "\n", - "p.hbar(y=dodge('countries', 0.25, range=p.y_range), right='leather', height=0.2, source=source,\n", - " color=\"#e84d60\")\n", + "p.hbar(\n", + " y=dodge(\"countries\", 0.25, range=p.y_range),\n", + " right=\"leather\",\n", + " height=0.2,\n", + " source=source,\n", + " color=\"#e84d60\",\n", + ")\n", "\n", "p.y_range.range_padding = 0.1\n", "p.ygrid.grid_line_color = None\n", @@ -3014,16 +3167,16 @@ } ], "source": [ - "#group by materials\n", - "#group by material\n", - "risk_lt = mock_data.groupby('Location t').sum()\n", + "# group by materials\n", + "# group by material\n", + "risk_lt = mock_data.groupby(\"Location t\").sum()\n", "\n", "## volume dataframe\n", "lt = list(risk_lt.index)\n", - "volumens = list(risk_lt['Volume'])\n", + "volumens = list(risk_lt[\"Volume\"])\n", "df = gpd.GeoDataFrame()\n", - "df['location']=lt\n", - "df['volume']=volumens# Create Bokeh-Table with DataFrame:\n", + "df[\"location\"] = lt\n", + "df[\"volume\"] = volumens # Create Bokeh-Table with DataFrame:\n", "\n", "\n", "data_table = DataTable(\n", @@ -3032,13 +3185,13 @@ " height=300,\n", ")\n", "\n", - "p_bar = risk_material[['Volume']].plot_bokeh(\n", - " kind='bar',\n", + "p_bar = risk_material[[\"Volume\"]].plot_bokeh(\n", + " kind=\"bar\",\n", " title=\"Total volume purchased by Location type\",\n", " show_figure=False,\n", - ") \n", + ")\n", "\n", - "#Combine Table and Scatterplot via grid layout:\n", + "# Combine Table and Scatterplot via grid layout:\n", "pandas_bokeh.plot_grid([[data_table, p_bar]], plot_width=300, plot_height=350)" ] }, @@ -3111,25 +3264,41 @@ } ], "source": [ - "x = Counter({\n", - " 'Origin country': 19345, 'Origin supplier': 39110, 'Unknown': 17955\n", - "})\n", + "x = Counter({\"Origin country\": 19345, \"Origin supplier\": 39110, \"Unknown\": 17955})\n", "\n", - "data = pd.DataFrame.from_dict(dict(x), orient='index').reset_index().rename(index=str, columns={0:'value', 'index':'Location'})\n", - "data['angle'] = data['value']/sum(x.values()) * 2*pi\n", - "data['color'] = BuGn[len(x)]\n", + "data = (\n", + " pd.DataFrame.from_dict(dict(x), orient=\"index\")\n", + " .reset_index()\n", + " .rename(index=str, columns={0: \"value\", \"index\": \"Location\"})\n", + ")\n", + "data[\"angle\"] = data[\"value\"] / sum(x.values()) * 2 * pi\n", + "data[\"color\"] = BuGn[len(x)]\n", "\n", "# Plotting code\n", "\n", - "p = figure(plot_height=350, title=\"Purchased volume by location type (tonnes)\", toolbar_location=None,\n", - " tools=\"hover\", tooltips=[(\"Location\", \"@Location\"),(\"Value\", \"@value\")])\n", + "p = figure(\n", + " plot_height=350,\n", + " title=\"Purchased volume by location type (tonnes)\",\n", + " toolbar_location=None,\n", + " tools=\"hover\",\n", + " tooltips=[(\"Location\", \"@Location\"), (\"Value\", \"@value\")],\n", + ")\n", "\n", - "p.annular_wedge(x=0, y=1, inner_radius=0.2, outer_radius=0.4,\n", - " start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),\n", - " line_color=\"white\", fill_color='color', legend='Location', source=data)\n", + "p.annular_wedge(\n", + " x=0,\n", + " y=1,\n", + " inner_radius=0.2,\n", + " outer_radius=0.4,\n", + " start_angle=cumsum(\"angle\", include_zero=True),\n", + " end_angle=cumsum(\"angle\"),\n", + " line_color=\"white\",\n", + " fill_color=\"color\",\n", + " legend=\"Location\",\n", + " source=data,\n", + ")\n", "\n", - "p.axis.axis_label=None\n", - "p.axis.visible=False\n", + "p.axis.axis_label = None\n", + "p.axis.visible = False\n", "p.grid.grid_line_color = None\n", "\n", "show(p)" @@ -3312,9 +3481,24 @@ } ], "source": [ - "water_risk_impact = mock_data[['Material', 'Volume', 'Country', 'Location t', 'Accuracy', 'wr_mean', 'wr_median', 'wr_std',\n", - " 'wr_max', 'wr_min', 'geometry',\n", - " 'wr_imp', 'wr_imp_min', 'wr_imp_max']]\n", + "water_risk_impact = mock_data[\n", + " [\n", + " \"Material\",\n", + " \"Volume\",\n", + " \"Country\",\n", + " \"Location t\",\n", + " \"Accuracy\",\n", + " \"wr_mean\",\n", + " \"wr_median\",\n", + " \"wr_std\",\n", + " \"wr_max\",\n", + " \"wr_min\",\n", + " \"geometry\",\n", + " \"wr_imp\",\n", + " \"wr_imp_min\",\n", + " \"wr_imp_max\",\n", + " ]\n", + "]\n", "water_risk_impact.head()" ] }, @@ -3429,9 +3613,9 @@ } ], "source": [ - "#water risk by material\n", - "water_risk_impact_material = water_risk_impact.groupby('Material').sum()\n", - "water_risk_impact_material\n" + "# water risk by material\n", + "water_risk_impact_material = water_risk_impact.groupby(\"Material\").sum()\n", + "water_risk_impact_material" ] }, { @@ -3545,7 +3729,7 @@ } ], "source": [ - "water_risk_material = water_risk_impact_material.sort_values('wr_mean', ascending=True)\n", + "water_risk_material = water_risk_impact_material.sort_values(\"wr_mean\", ascending=True)\n", "water_risk_material" ] }, @@ -3660,7 +3844,7 @@ } ], "source": [ - "water_impact_material = water_risk_impact_material.sort_values('wr_imp', ascending=True)\n", + "water_impact_material = water_risk_impact_material.sort_values(\"wr_imp\", ascending=True)\n", "water_impact_material" ] }, @@ -3762,93 +3946,142 @@ } ], "source": [ + "# water risk as donnut chart\n", + "x = Counter({\"Cotton\": 1559.834051, \"Leather\": 767231.370025, \"Rubber\": 1783.558232})\n", "\n", + "data = (\n", + " pd.DataFrame.from_dict(dict(x), orient=\"index\")\n", + " .reset_index()\n", + " .rename(index=str, columns={0: \"value\", \"index\": \"Commodity\"})\n", + ")\n", + "data[\"angle\"] = data[\"value\"] / sum(x.values()) * 2 * pi\n", + "data[\"color\"] = BuGn[len(x)]\n", "\n", - "#water risk as donnut chart\n", - "x = Counter({\n", - " 'Cotton': 1559.834051, 'Leather': 767231.370025, 'Rubber': 1783.558232\n", - "})\n", - "\n", - "data = pd.DataFrame.from_dict(dict(x), orient='index').reset_index().rename(index=str, columns={0:'value', 'index':'Commodity'})\n", - "data['angle'] = data['value']/sum(x.values()) * 2*pi\n", - "data['color'] = BuGn[len(x)]\n", - "\n", - "p_d_risk = figure(plot_height=350, title=\"Unsustainable water use risk (m3/tonne) by commodity in 2000\", toolbar_location=None,\n", - " tools=\"hover\", tooltips=[(\"Commodity\", \"@Commodity\"),(\"Value\", \"@value\")])\n", + "p_d_risk = figure(\n", + " plot_height=350,\n", + " title=\"Unsustainable water use risk (m3/tonne) by commodity in 2000\",\n", + " toolbar_location=None,\n", + " tools=\"hover\",\n", + " tooltips=[(\"Commodity\", \"@Commodity\"), (\"Value\", \"@value\")],\n", + ")\n", "\n", - "p_d_risk.annular_wedge(x=0, y=1, inner_radius=0.2, outer_radius=0.4,\n", - " start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),\n", - " line_color=\"white\", fill_color='color', legend='Commodity', source=data)\n", + "p_d_risk.annular_wedge(\n", + " x=0,\n", + " y=1,\n", + " inner_radius=0.2,\n", + " outer_radius=0.4,\n", + " start_angle=cumsum(\"angle\", include_zero=True),\n", + " end_angle=cumsum(\"angle\"),\n", + " line_color=\"white\",\n", + " fill_color=\"color\",\n", + " legend=\"Commodity\",\n", + " source=data,\n", + ")\n", "\n", - "p_d_risk.axis.axis_label=None\n", - "p_d_risk.axis.visible=False\n", + "p_d_risk.axis.axis_label = None\n", + "p_d_risk.axis.visible = False\n", "p_d_risk.grid.grid_line_color = None\n", "\n", - "#water risk as bar chart\n", + "# water risk as bar chart\n", "\n", "material = list(water_risk_material.index)\n", - "risk = list(water_risk_material['wr_mean'])\n", + "risk = list(water_risk_material[\"wr_mean\"])\n", "\n", "\n", - "data = {'material': material,\n", - " 'risk': risk}\n", + "data = {\"material\": material, \"risk\": risk}\n", "\n", "source = ColumnDataSource(data)\n", "\n", - "p_b_risk = figure(y_range=material, x_range=(0, 2067231.370025), plot_width=250, title=\"Top commodities by water risk (m3/Tonnes)\",\n", - " toolbar_location=None, tools=\"\")\n", + "p_b_risk = figure(\n", + " y_range=material,\n", + " x_range=(0, 2067231.370025),\n", + " plot_width=250,\n", + " title=\"Top commodities by water risk (m3/Tonnes)\",\n", + " toolbar_location=None,\n", + " tools=\"\",\n", + ")\n", "\n", - "p_b_risk.hbar(y=dodge('material', 0, range=p.y_range), right='risk', height=0.2, source=source,\n", - " color=\"#c9d9d3\")\n", + "p_b_risk.hbar(\n", + " y=dodge(\"material\", 0, range=p.y_range),\n", + " right=\"risk\",\n", + " height=0.2,\n", + " source=source,\n", + " color=\"#c9d9d3\",\n", + ")\n", "\n", "p_b_risk.y_range.range_padding = 0.1\n", "p_b_risk.ygrid.grid_line_color = None\n", "\n", "\n", + "# water impact as donut chart\n", + "x = Counter({\"Cotton\": 3.655594e06, \"Leather\": 7.877214e08, \"Rubber\": 2.190081e06})\n", "\n", - "#water impact as donut chart\n", - "x = Counter({\n", - " 'Cotton': 3.655594e+06, 'Leather': 7.877214e+08, 'Rubber': 2.190081e+06\n", - "})\n", - "\n", - "data = pd.DataFrame.from_dict(dict(x), orient='index').reset_index().rename(index=str, columns={0:'value', 'index':'Commodity'})\n", - "data['angle'] = data['value']/sum(x.values()) * 2*pi\n", - "data['color'] = BuGn[len(x)]\n", + "data = (\n", + " pd.DataFrame.from_dict(dict(x), orient=\"index\")\n", + " .reset_index()\n", + " .rename(index=str, columns={0: \"value\", \"index\": \"Commodity\"})\n", + ")\n", + "data[\"angle\"] = data[\"value\"] / sum(x.values()) * 2 * pi\n", + "data[\"color\"] = BuGn[len(x)]\n", "\n", - "p_d_imp = figure(plot_height=350, title=\"Unsustainable water use impact (m3) by commodity in 2000\", toolbar_location=None,\n", - " tools=\"hover\", tooltips=[(\"Commodity\", \"@Commodity\"),(\"Value\", \"@value\")])\n", + "p_d_imp = figure(\n", + " plot_height=350,\n", + " title=\"Unsustainable water use impact (m3) by commodity in 2000\",\n", + " toolbar_location=None,\n", + " tools=\"hover\",\n", + " tooltips=[(\"Commodity\", \"@Commodity\"), (\"Value\", \"@value\")],\n", + ")\n", "\n", - "p_d_imp.annular_wedge(x=0, y=1, inner_radius=0.2, outer_radius=0.4,\n", - " start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),\n", - " line_color=\"white\", fill_color='color', legend='Commodity', source=data)\n", + "p_d_imp.annular_wedge(\n", + " x=0,\n", + " y=1,\n", + " inner_radius=0.2,\n", + " outer_radius=0.4,\n", + " start_angle=cumsum(\"angle\", include_zero=True),\n", + " end_angle=cumsum(\"angle\"),\n", + " line_color=\"white\",\n", + " fill_color=\"color\",\n", + " legend=\"Commodity\",\n", + " source=data,\n", + ")\n", "\n", - "p_d_imp.axis.axis_label=None\n", - "p_d_imp.axis.visible=False\n", + "p_d_imp.axis.axis_label = None\n", + "p_d_imp.axis.visible = False\n", "p_d_imp.grid.grid_line_color = None\n", "\n", - "#water impact as bar chart\n", + "# water impact as bar chart\n", "\n", "material = list(water_impact_material.index)\n", - "impact = list(water_impact_material['wr_mean'])\n", + "impact = list(water_impact_material[\"wr_mean\"])\n", "\n", "\n", - "data = {'material': material,\n", - " 'impact': impact}\n", + "data = {\"material\": material, \"impact\": impact}\n", "\n", "source = ColumnDataSource(data)\n", "\n", - "p_b_impact = figure(y_range=material, x_range=(0, 2067231.370025), plot_width=250, title=\"Top commodities by water impact (m3)\",\n", - " toolbar_location=None, tools=\"\")\n", + "p_b_impact = figure(\n", + " y_range=material,\n", + " x_range=(0, 2067231.370025),\n", + " plot_width=250,\n", + " title=\"Top commodities by water impact (m3)\",\n", + " toolbar_location=None,\n", + " tools=\"\",\n", + ")\n", "\n", - "p_b_impact.hbar(y=dodge('material', 0, range=p.y_range), right='impact', height=0.2, source=source,\n", - " color=\"#c9d9d3\")\n", + "p_b_impact.hbar(\n", + " y=dodge(\"material\", 0, range=p.y_range),\n", + " right=\"impact\",\n", + " height=0.2,\n", + " source=source,\n", + " color=\"#c9d9d3\",\n", + ")\n", "\n", "p_b_impact.y_range.range_padding = 0.1\n", "p_b_impact.ygrid.grid_line_color = None\n", "\n", "\n", - "#Make Dashboard with Grid Layout:\n", - "pandas_bokeh.plot_grid([[p_d_risk, p_d_imp],[p_b_risk,p_b_impact]], plot_width=450)" + "# Make Dashboard with Grid Layout:\n", + "pandas_bokeh.plot_grid([[p_d_risk, p_d_imp], [p_b_risk, p_b_impact]], plot_width=450)" ] }, { @@ -4058,8 +4291,8 @@ } ], "source": [ - "#risk and impact over time\n", - "pct_change_df = pd.read_csv('../../datasets/raw/crop_data/projection_factor_byCountry.csv')\n", + "# risk and impact over time\n", + "pct_change_df = pd.read_csv(\"../../datasets/raw/crop_data/projection_factor_byCountry.csv\")\n", "pct_change_df.head()" ] }, @@ -4157,14 +4390,13 @@ } ], "source": [ - "\n", "mean_pct = pd.DataFrame(pct_change_df.mean())[1:]\n", "mean_pct = mean_pct.transpose()\n", - "mean_pct['2000']=0\n", - "mean_pct['2001']=0\n", - "mean_pct['2007']=0\n", - "mean_pct['2008']=0\n", - "mean_pct['2012']=0\n", + "mean_pct[\"2000\"] = 0\n", + "mean_pct[\"2001\"] = 0\n", + "mean_pct[\"2007\"] = 0\n", + "mean_pct[\"2008\"] = 0\n", + "mean_pct[\"2012\"] = 0\n", "mean_pct" ] }, @@ -4258,32 +4490,33 @@ "source": [ "pct_change_json = {}\n", "for el in mean_pct.columns:\n", - " pct_change_json[el]=mean_pct[el].iloc[0]\n", - " \n", - "#estimate total impact to project\n", + " pct_change_json[el] = mean_pct[el].iloc[0]\n", + "\n", + "# estimate total impact to project\n", "total_risk_impact = water_risk_impact.sum()\n", "\n", "##RISK OVER TIME\n", - "#project total risk\n", - "average_risk = total_risk_impact['wr_mean']\n", - "pr_average_risk = [(average_risk + pct_change_json[f'{year}']*average_risk) for year in range(2000,2020)]\n", + "# project total risk\n", + "average_risk = total_risk_impact[\"wr_mean\"]\n", + "pr_average_risk = [\n", + " (average_risk + pct_change_json[f\"{year}\"] * average_risk) for year in range(2000, 2020)\n", + "]\n", "\n", - "#project max risk\n", - "max_risk = total_risk_impact['wr_max']\n", - "pr_max_risk = [(max_risk + pct_change_json[f'{year}']*max_risk) for year in range(2000,2020)]\n", + "# project max risk\n", + "max_risk = total_risk_impact[\"wr_max\"]\n", + "pr_max_risk = [(max_risk + pct_change_json[f\"{year}\"] * max_risk) for year in range(2000, 2020)]\n", "\n", - "#project min risk\n", - "min_risk = total_risk_impact['wr_min']\n", - "pr_min_risk = [(min_risk + pct_change_json[f'{year}']*min_risk) for year in range(2000,2020)]\n", + "# project min risk\n", + "min_risk = total_risk_impact[\"wr_min\"]\n", + "pr_min_risk = [(min_risk + pct_change_json[f\"{year}\"] * min_risk) for year in range(2000, 2020)]\n", "\n", - "#generate dataframe\n", + "# generate dataframe\n", "df_risk = pd.DataFrame()\n", - "df_risk['year']=[year for year in range(2000,2020)]\n", - "df_risk['average_risk']=pr_average_risk\n", - "df_risk['min_risk']=pr_min_risk\n", - "df_risk['max_risk']=pr_max_risk\n", - "df_risk.head()\n", - "\n" + "df_risk[\"year\"] = [year for year in range(2000, 2020)]\n", + "df_risk[\"average_risk\"] = pr_average_risk\n", + "df_risk[\"min_risk\"] = pr_min_risk\n", + "df_risk[\"max_risk\"] = pr_max_risk\n", + "df_risk.head()" ] }, { @@ -4303,18 +4536,22 @@ } ], "source": [ - "df_risk['year'] = pd.to_datetime(df_risk['year'], format='%Y')\n", + "df_risk[\"year\"] = pd.to_datetime(df_risk[\"year\"], format=\"%Y\")\n", "\n", "source = ColumnDataSource(df_risk)\n", "\n", "p_risk = figure(x_axis_type=\"datetime\")\n", "\n", - "p_risk.line(x='year', y='average_risk', line_width=2, source=source, legend='Average impact')\n", - "p_risk.line(x='year', y='min_risk', line_width=2, source=source, color=Spectral10[5], legend='Min impact')\n", - "p_risk.line(x='year', y='max_risk', line_width=2, source=source, color=Spectral10[9], legend='Max impact')\n", + "p_risk.line(x=\"year\", y=\"average_risk\", line_width=2, source=source, legend=\"Average impact\")\n", + "p_risk.line(\n", + " x=\"year\", y=\"min_risk\", line_width=2, source=source, color=Spectral10[5], legend=\"Min impact\"\n", + ")\n", + "p_risk.line(\n", + " x=\"year\", y=\"max_risk\", line_width=2, source=source, color=Spectral10[9], legend=\"Max impact\"\n", + ")\n", "\n", - "p_risk.title.text = 'Unsustainable water use risk over time'\n", - "p_risk.yaxis.axis_label = 'm3 / ha'" + "p_risk.title.text = \"Unsustainable water use risk over time\"\n", + "p_risk.yaxis.axis_label = \"m3 / ha\"" ] }, { @@ -4406,25 +4643,27 @@ ], "source": [ "##IMPACT OVER TIME\n", - "#project total risk\n", - "average_imp = total_risk_impact['wr_imp']\n", - "pr_average_imp = [(average_risk + pct_change_json[f'{year}']*average_risk) for year in range(2000,2020)]\n", + "# project total risk\n", + "average_imp = total_risk_impact[\"wr_imp\"]\n", + "pr_average_imp = [\n", + " (average_risk + pct_change_json[f\"{year}\"] * average_risk) for year in range(2000, 2020)\n", + "]\n", "\n", - "#project max risk\n", - "max_risk = total_risk_impact['wr_imp_max']\n", - "pr_max_imp = [(max_risk + pct_change_json[f'{year}']*max_risk) for year in range(2000,2020)]\n", + "# project max risk\n", + "max_risk = total_risk_impact[\"wr_imp_max\"]\n", + "pr_max_imp = [(max_risk + pct_change_json[f\"{year}\"] * max_risk) for year in range(2000, 2020)]\n", "\n", - "#project min risk\n", - "max_risk = total_risk_impact['wr_imp_max']\n", - "pr_min_imp = [(min_risk + pct_change_json[f'{year}']*min_risk) for year in range(2000,2020)]\n", + "# project min risk\n", + "max_risk = total_risk_impact[\"wr_imp_max\"]\n", + "pr_min_imp = [(min_risk + pct_change_json[f\"{year}\"] * min_risk) for year in range(2000, 2020)]\n", "\n", "\n", - "#generate dataframe\n", + "# generate dataframe\n", "df_imp = pd.DataFrame()\n", - "df_imp['year']=[year for year in range(2000,2020)]\n", - "df_imp['average_imp']=pr_average_imp\n", - "df_imp['min_imp']=pr_min_imp\n", - "df_imp['max_imp']=pr_max_imp\n", + "df_imp[\"year\"] = [year for year in range(2000, 2020)]\n", + "df_imp[\"average_imp\"] = pr_average_imp\n", + "df_imp[\"min_imp\"] = pr_min_imp\n", + "df_imp[\"max_imp\"] = pr_max_imp\n", "df_imp.head()" ] }, @@ -4445,18 +4684,22 @@ } ], "source": [ - "df_imp['year'] = pd.to_datetime(df_imp['year'], format='%Y')\n", + "df_imp[\"year\"] = pd.to_datetime(df_imp[\"year\"], format=\"%Y\")\n", "\n", "source = ColumnDataSource(df_imp)\n", "\n", "p_imp = figure(x_axis_type=\"datetime\")\n", "\n", - "p_imp.line(x='year', y='average_imp', line_width=2, source=source, legend='Average impact')\n", - "p_imp.line(x='year', y='min_imp', line_width=2, source=source, color=Spectral10[5], legend='Min impact')\n", - "p_imp.line(x='year', y='max_imp', line_width=2, source=source, color=Spectral10[9], legend='Max impact')\n", + "p_imp.line(x=\"year\", y=\"average_imp\", line_width=2, source=source, legend=\"Average impact\")\n", + "p_imp.line(\n", + " x=\"year\", y=\"min_imp\", line_width=2, source=source, color=Spectral10[5], legend=\"Min impact\"\n", + ")\n", + "p_imp.line(\n", + " x=\"year\", y=\"max_imp\", line_width=2, source=source, color=Spectral10[9], legend=\"Max impact\"\n", + ")\n", "\n", - "p_imp.title.text = 'Unsustainable water use impact over time'\n", - "p_imp.yaxis.axis_label = 'm3'" + "p_imp.title.text = \"Unsustainable water use impact over time\"\n", + "p_imp.yaxis.axis_label = \"m3\"" ] }, { @@ -4549,7 +4792,7 @@ } ], "source": [ - "#Make Dashboard with Grid Layout:\n", + "# Make Dashboard with Grid Layout:\n", "pandas_bokeh.plot_grid([[p_risk, p_imp]], plot_width=450)" ] }, @@ -4894,7 +5137,7 @@ } ], "source": [ - "water_risk_impact[['Country','wr_mean','wr_imp']][4:]" + "water_risk_impact[[\"Country\", \"wr_mean\", \"wr_imp\"]][4:]" ] }, { @@ -4959,10 +5202,16 @@ } ], "source": [ - "p = figure(title = \"\")\n", - "p.circle('wr_mean','wr_imp',source=water_risk_impact[['wr_mean','wr_imp']][4:],fill_alpha=0.2, size=10)\n", - "p.xaxis.axis_label = 'Risk'\n", - "p.yaxis.axis_label = 'Impact'\n", + "p = figure(title=\"\")\n", + "p.circle(\n", + " \"wr_mean\",\n", + " \"wr_imp\",\n", + " source=water_risk_impact[[\"wr_mean\", \"wr_imp\"]][4:],\n", + " fill_alpha=0.2,\n", + " size=10,\n", + ")\n", + "p.xaxis.axis_label = \"Risk\"\n", + "p.yaxis.axis_label = \"Impact\"\n", "show(p)" ] }, diff --git a/data/notebooks/Lab/7_Tables_&_charts_prototype_v1.ipynb b/data/notebooks/Lab/7_Tables_&_charts_prototype_v1.ipynb index 1c83cbe37..58886c16a 100644 --- a/data/notebooks/Lab/7_Tables_&_charts_prototype_v1.ipynb +++ b/data/notebooks/Lab/7_Tables_&_charts_prototype_v1.ipynb @@ -130,7 +130,7 @@ " env_key, _val = line.split(\"=\", 1)\n", " env_value = _val.split(\"\\n\")[0]\n", " env[env_key] = env_value\n", - " \n", + "\n", "list(env.keys())" ] }, @@ -141,12 +141,14 @@ "metadata": {}, "outputs": [], "source": [ - "postgres_thread_pool = ThreadedConnectionPool(1, 50,\n", - " host=env['API_POSTGRES_HOST'],\n", - " port=env['API_POSTGRES_PORT'],\n", - " user=env['API_POSTGRES_USERNAME'],\n", - " password=env['API_POSTGRES_PASSWORD']\n", - " )" + "postgres_thread_pool = ThreadedConnectionPool(\n", + " 1,\n", + " 50,\n", + " host=env[\"API_POSTGRES_HOST\"],\n", + " port=env[\"API_POSTGRES_PORT\"],\n", + " user=env[\"API_POSTGRES_USERNAME\"],\n", + " password=env[\"API_POSTGRES_PASSWORD\"],\n", + ")" ] }, { @@ -180,15 +182,26 @@ "# EXAMPLE OF FILTERS THAT THE CLIENT CAN SEND:\n", "\n", "# array of indicators - required\n", - "indicators = ('0594aba7-70a5-460c-9b58-fc1802d264ea', '633cf928-7c4f-41a3-99c5-e8c1bda0b323', 'c71eb531-2c8e-40d2-ae49-1049543be4d1', 'e2c00251-fe31-4330-8c38-604535d795dc') # ids of indicators, required\n", - "#group by key - Required - material, business-unit, region, supplier\n", - "groupBy='material' \n", - "start_year= 2019#required\n", - "end_year= 2022 #required\n", + "indicators = (\n", + " \"0594aba7-70a5-460c-9b58-fc1802d264ea\",\n", + " \"633cf928-7c4f-41a3-99c5-e8c1bda0b323\",\n", + " \"c71eb531-2c8e-40d2-ae49-1049543be4d1\",\n", + " \"e2c00251-fe31-4330-8c38-604535d795dc\",\n", + ") # ids of indicators, required\n", + "# group by key - Required - material, business-unit, region, supplier\n", + "groupBy = \"material\"\n", + "start_year = 2019 # required\n", + "end_year = 2022 # required\n", "# OPTIONAL FIELDS\n", - "materials= ('41822942-3957-4526-9dc5-a80d94419a1e', '80d52237-bb4a-4f25-9133-cbbebaa68734') #optional - if no provided we don't filter by material\n", - "origins=('05bd7ca9-6687-4df2-a46d-31e12f0f01bf', 'fd4b4fc0-6640-47e6-ba45-f65dd34072c5') #optioal - if not provided we don't filter by origin\n", - "#suppliers=[1, 2, 3] # optional - if not provided, we don't filter by supplier\n", + "materials = (\n", + " \"41822942-3957-4526-9dc5-a80d94419a1e\",\n", + " \"80d52237-bb4a-4f25-9133-cbbebaa68734\",\n", + ") # optional - if no provided we don't filter by material\n", + "origins = (\n", + " \"05bd7ca9-6687-4df2-a46d-31e12f0f01bf\",\n", + " \"fd4b4fc0-6640-47e6-ba45-f65dd34072c5\",\n", + ") # optioal - if not provided we don't filter by origin\n", + "# suppliers=[1, 2, 3] # optional - if not provided, we don't filter by supplier\n", "\n", "\n", "# connect to the ddbb\n", @@ -196,9 +209,10 @@ "cursor = conn.cursor()\n", "\n", "\n", - "## NOTE: The same logic for the and indicators, materials and admin regions would be applied to the supplier. \n", + "## NOTE: The same logic for the and indicators, materials and admin regions would be applied to the supplier.\n", "# As all the data is null, I'm not filtering by anything in this case\n", - "cursor.execute(f\"\"\"\n", + "cursor.execute(\n", + " f\"\"\"\n", " select sr.\"year\", sum(sr.tonnage) tonnes, sum(ir.value) impact, i.id, i.\"shortName\", m.\"name\" \n", " from sourcing_records sr --select sourcing recods\n", " left join sourcing_location sl on sl.id =sr.\"sourcingLocationId\" --join sourcing locations for filtering data\n", @@ -212,7 +226,8 @@ " -- and sl.\"t1SupplierId\" in (list) if filter selected\n", " -- and sl.\"producerId\" in (list) if filter selected\n", " group by sl.\"materialId\", sr.\"year\", i.id, i.\"shortName\", m.\"name\" -- group by value to get the sum of impacts\n", - "\"\"\")\n", + "\"\"\"\n", + ")\n", "\n", "response = cursor.fetchall()" ] @@ -451,80 +466,87 @@ ], "source": [ "data = []\n", - "default_agr = 1.5 #default annual growth rate - used for projecting the data. \n", - "#iterate the response over the different indicators that the client has provided\n", + "default_agr = 1.5 # default annual growth rate - used for projecting the data.\n", + "# iterate the response over the different indicators that the client has provided\n", "for idx, indicatorId in enumerate(indicators):\n", " ##append data by indicator\n", - " data.append({\n", - " 'indicatorShortName':[el[4] for el in response if el[3]==indicatorId][0], # set the indicator shortname that we get from the query above\n", - " 'indicatorId':indicatorId,# set the indicator id\n", - " 'groupBy':groupBy, #set the group by key\n", - " 'rows':[], # we will append later the data by year and by group by value\n", - " 'yearSum':[] # we will append later the sum of total impact by yera and by indicator\n", - " })\n", - " #populate rows\n", - " respose_byIndicator = [el for el in response if el[3]==indicatorId] # filter the response by the indicatorId\n", - " unique_names = set([el[5] for el in response if el[3]==indicatorId]) #get unique names for idicator id\n", + " data.append(\n", + " {\n", + " \"indicatorShortName\": [el[4] for el in response if el[3] == indicatorId][\n", + " 0\n", + " ], # set the indicator shortname that we get from the query above\n", + " \"indicatorId\": indicatorId, # set the indicator id\n", + " \"groupBy\": groupBy, # set the group by key\n", + " \"rows\": [], # we will append later the data by year and by group by value\n", + " \"yearSum\": [], # we will append later the sum of total impact by yera and by indicator\n", + " }\n", + " )\n", + " # populate rows\n", + " respose_byIndicator = [\n", + " el for el in response if el[3] == indicatorId\n", + " ] # filter the response by the indicatorId\n", + " unique_names = set(\n", + " [el[5] for el in response if el[3] == indicatorId]\n", + " ) # get unique names for idicator id\n", " for name in unique_names:\n", - " data[idx]['rows'].append({\n", - " 'name':name, #set name of the individual names of the groupby key\n", - " 'values':[] #append values by year\n", - " })\n", - " \n", - " i = len(data[idx]['rows'])-1 #index for appending later the data\n", - " for year in range(start_year, end_year+1): # iterate over each year from start and end year\n", - " value = [el[2] for el in respose_byIndicator if el[0]==year and el[5]==name] # get the value of impact for those years we have record on the ddbb\n", - " if len(value): # if we have data, we append the value\n", - " data[idx]['rows'][i]['values'].append({\n", - " 'year':year,\n", - " 'value':value[0],\n", - " 'isProjected':False\n", - " })\n", + " data[idx][\"rows\"].append(\n", + " {\n", + " \"name\": name, # set name of the individual names of the groupby key\n", + " \"values\": [], # append values by year\n", + " }\n", + " )\n", + "\n", + " i = len(data[idx][\"rows\"]) - 1 # index for appending later the data\n", + " for year in range(\n", + " start_year, end_year + 1\n", + " ): # iterate over each year from start and end year\n", + " value = [\n", + " el[2] for el in respose_byIndicator if el[0] == year and el[5] == name\n", + " ] # get the value of impact for those years we have record on the ddbb\n", + " if len(value): # if we have data, we append the value\n", + " data[idx][\"rows\"][i][\"values\"].append(\n", + " {\"year\": year, \"value\": value[0], \"isProjected\": False}\n", + " )\n", " value = value[0]\n", - " else: # if we don't have data, we project\n", + " else: # if we don't have data, we project\n", " # we get the latest value to project with the default annual growth rate\n", - " value_to_project = data[idx]['rows'][i]['values'][-1]['value']\n", - " value = value_to_project + value_to_project*default_agr/100 \n", - " data[idx]['rows'][i]['values'].append({\n", - " 'year':year,\n", - " 'value':value,\n", - " 'isProjected':True\n", - " })\n", - " \n", + " value_to_project = data[idx][\"rows\"][i][\"values\"][-1][\"value\"]\n", + " value = value_to_project + value_to_project * default_agr / 100\n", + " data[idx][\"rows\"][i][\"values\"].append(\n", + " {\"year\": year, \"value\": value, \"isProjected\": True}\n", + " )\n", + "\n", " # append the total sum of impact by year by indicator\n", - " for i,year in enumerate(range(start_year, end_year+1)):\n", + " for i, year in enumerate(range(start_year, end_year + 1)):\n", " ## add sum of impact by indicator\n", - " if len(data[idx]['rows']):\n", - " data[idx]['yearSum'].append({\n", - " 'year':year,\n", - " 'value':sum([el['values'][i]['value'] for el in data[idx]['rows'] if el['values'][i]['year']==year])\n", - " })\n", - "# ONCE WE HAVE ALL THE DATA BY INDICATOR, THE CLIENT WILL ALSO NEED THE TOTAL PURCHASED VOLUME BY YEAR \n", + " if len(data[idx][\"rows\"]):\n", + " data[idx][\"yearSum\"].append(\n", + " {\n", + " \"year\": year,\n", + " \"value\": sum(\n", + " [\n", + " el[\"values\"][i][\"value\"]\n", + " for el in data[idx][\"rows\"]\n", + " if el[\"values\"][i][\"year\"] == year\n", + " ]\n", + " ),\n", + " }\n", + " )\n", + "# ONCE WE HAVE ALL THE DATA BY INDICATOR, THE CLIENT WILL ALSO NEED THE TOTAL PURCHASED VOLUME BY YEAR\n", "# add total sum of purchase tonnes\n", - "data.append({\n", - " 'name':'purchaseTonnes',\n", - " 'values':[]\n", - "})\n", + "data.append({\"name\": \"purchaseTonnes\", \"values\": []})\n", "\n", - "for year in range(start_year, end_year+1):\n", - " purchase_tonnes = sum([el[2] for el in response if el[0]==year])\n", - " if purchase_tonnes!=0:\n", - " data[-1]['values'].append({\n", - " 'year':year,\n", - " 'value':purchase_tonnes,\n", - " 'isProjected': False\n", - " })\n", + "for year in range(start_year, end_year + 1):\n", + " purchase_tonnes = sum([el[2] for el in response if el[0] == year])\n", + " if purchase_tonnes != 0:\n", + " data[-1][\"values\"].append({\"year\": year, \"value\": purchase_tonnes, \"isProjected\": False})\n", " else:\n", - " tonnes_to_project=data[-1]['values'][-1]['value']\n", - " purchase_tonnes = tonnes_to_project + tonnes_to_project*default_agr/100 \n", - " data[-1]['values'].append({\n", - " 'year':year,\n", - " 'value':purchase_tonnes,\n", - " 'isProjected': True\n", - " })\n", - " \n", - " \n", - "data " + " tonnes_to_project = data[-1][\"values\"][-1][\"value\"]\n", + " purchase_tonnes = tonnes_to_project + tonnes_to_project * default_agr / 100\n", + " data[-1][\"values\"].append({\"year\": year, \"value\": purchase_tonnes, \"isProjected\": True})\n", + "\n", + "\n", + "data" ] } ], diff --git a/data/notebooks/Lab/8_1_scenario_prototype_create_scenario.ipynb b/data/notebooks/Lab/8_1_scenario_prototype_create_scenario.ipynb index 567f0822d..ff0bb0223 100644 --- a/data/notebooks/Lab/8_1_scenario_prototype_create_scenario.ipynb +++ b/data/notebooks/Lab/8_1_scenario_prototype_create_scenario.ipynb @@ -38,10 +38,9 @@ "outputs": [], "source": [ "# import libraries\n", - "from psycopg2.pool import ThreadedConnectionPool\n", - "\n", + "import pandas as pd\n", "from IPython.display import Image\n", - "import pandas as pd\n" + "from psycopg2.pool import ThreadedConnectionPool" ] }, { @@ -63,111 +62,119 @@ " }\n", " agr: annual growth rate.\n", " groupby: same group by as the unse used on the api query\n", - " \n", + "\n", " \"\"\"\n", " data = []\n", " for idx, indicatorId in enumerate(indicators):\n", " ##append data by indicator\n", - " data.append({\n", - " 'indicatorShortName':[el[4] for el in response if el[3]==indicatorId][0], # set the indicator shortname that we get from the query above\n", - " 'indicatorId':indicatorId,# set the indicator id\n", - " 'groupBy':groupBy, #set the group by key\n", - " 'rows':[], # we will append later the data by year and by group by value\n", - " 'yearSum':[] # we will append later the sum of total impact by yera and by indicator\n", - " })\n", - " #populate rows\n", - " respose_byIndicator = [el for el in response if el[3]==indicatorId] # filter the response by the indicatorId\n", - " unique_names = set([el[5] for el in response if el[3]==indicatorId]) #get unique names for idicator id\n", + " data.append(\n", + " {\n", + " \"indicatorShortName\": [el[4] for el in response if el[3] == indicatorId][\n", + " 0\n", + " ], # set the indicator shortname that we get from the query above\n", + " \"indicatorId\": indicatorId, # set the indicator id\n", + " \"groupBy\": groupBy, # set the group by key\n", + " \"rows\": [], # we will append later the data by year and by group by value\n", + " \"yearSum\": [], # we will append later the sum of total impact by yera and by indicator\n", + " }\n", + " )\n", + " # populate rows\n", + " respose_byIndicator = [\n", + " el for el in response if el[3] == indicatorId\n", + " ] # filter the response by the indicatorId\n", + " unique_names = set(\n", + " [el[5] for el in response if el[3] == indicatorId]\n", + " ) # get unique names for idicator id\n", " for name in unique_names:\n", - " data[idx]['rows'].append({\n", - " 'name':name, #set name of the individual names of the groupby key\n", - " 'values':[] #append values by year\n", - " })\n", - " i = len(data[idx]['rows'])-1 #index for appending later the data\n", - " for year in range(start_year, end_year+1): # iterate over each year from start and end year\n", - " value = [el[2] for el in respose_byIndicator if el[0]==year and el[5]==name] # get the value of impact for those years we have record on the ddbb\n", - " if len(value): # if we have data, we append the value\n", - " data[idx]['rows'][i]['values'].append({\n", - " 'year':year,\n", - " 'value':value[0],\n", - " 'isProjected':False\n", - " })\n", + " data[idx][\"rows\"].append(\n", + " {\n", + " \"name\": name, # set name of the individual names of the groupby key\n", + " \"values\": [], # append values by year\n", + " }\n", + " )\n", + " i = len(data[idx][\"rows\"]) - 1 # index for appending later the data\n", + " for year in range(\n", + " start_year, end_year + 1\n", + " ): # iterate over each year from start and end year\n", + " value = [\n", + " el[2] for el in respose_byIndicator if el[0] == year and el[5] == name\n", + " ] # get the value of impact for those years we have record on the ddbb\n", + " if len(value): # if we have data, we append the value\n", + " data[idx][\"rows\"][i][\"values\"].append(\n", + " {\"year\": year, \"value\": value[0], \"isProjected\": False}\n", + " )\n", " value = value[0]\n", - " else: # if we don't have data, we project\n", + " else: # if we don't have data, we project\n", " # we get the latest value to project with the default annual growth rate\n", - " value_to_project = data[idx]['rows'][i]['values'][-1]['value']\n", - " value = value_to_project + value_to_project*default_agr/100 \n", - " data[idx]['rows'][i]['values'].append({\n", - " 'year':year,\n", - " 'value':value,\n", - " 'isProjected':True\n", - " })\n", + " value_to_project = data[idx][\"rows\"][i][\"values\"][-1][\"value\"]\n", + " value = value_to_project + value_to_project * default_agr / 100\n", + " data[idx][\"rows\"][i][\"values\"].append(\n", + " {\"year\": year, \"value\": value, \"isProjected\": True}\n", + " )\n", " # append the total sum of impact by year by indicator\n", - " for i,year in enumerate(range(start_year, end_year+1)):\n", + " for i, year in enumerate(range(start_year, end_year + 1)):\n", " ## add sum of impact by indicator\n", - " if len(data[idx]['rows']):\n", - " data[idx]['yearSum'].append({\n", - " 'year':year,\n", - " 'value':sum([el['values'][i]['value'] for el in data[idx]['rows'] if el['values'][i]['year']==year])\n", - " })\n", - " # ONCE WE HAVE ALL THE DATA BY INDICATOR, THE CLIENT WILL ALSO NEED THE TOTAL PURCHASED VOLUME BY YEAR \n", + " if len(data[idx][\"rows\"]):\n", + " data[idx][\"yearSum\"].append(\n", + " {\n", + " \"year\": year,\n", + " \"value\": sum(\n", + " [\n", + " el[\"values\"][i][\"value\"]\n", + " for el in data[idx][\"rows\"]\n", + " if el[\"values\"][i][\"year\"] == year\n", + " ]\n", + " ),\n", + " }\n", + " )\n", + " # ONCE WE HAVE ALL THE DATA BY INDICATOR, THE CLIENT WILL ALSO NEED THE TOTAL PURCHASED VOLUME BY YEAR\n", " # add total sum of purchase tonnes\n", - " data.append({\n", - " 'name':'purchaseTonnes',\n", - " 'values':[]\n", - " })\n", - "\n", - " for year in range(start_year, end_year+1):\n", - " purchase_tonnes = sum([el[2] for el in response if el[0]==year])\n", - " if purchase_tonnes!=0:\n", - " data[-1]['values'].append({\n", - " 'year':year,\n", - " 'value':purchase_tonnes,\n", - " 'isProjected': False\n", - " })\n", + " data.append({\"name\": \"purchaseTonnes\", \"values\": []})\n", + "\n", + " for year in range(start_year, end_year + 1):\n", + " purchase_tonnes = sum([el[2] for el in response if el[0] == year])\n", + " if purchase_tonnes != 0:\n", + " data[-1][\"values\"].append(\n", + " {\"year\": year, \"value\": purchase_tonnes, \"isProjected\": False}\n", + " )\n", " else:\n", - " tonnes_to_project=data[-1]['values'][-1]['value']\n", - " purchase_tonnes = tonnes_to_project + tonnes_to_project*default_agr/100 \n", - " data[-1]['values'].append({\n", - " 'year':year,\n", - " 'value':purchase_tonnes,\n", - " 'isProjected': True\n", - " })\n", - " \n", + " tonnes_to_project = data[-1][\"values\"][-1][\"value\"]\n", + " purchase_tonnes = tonnes_to_project + tonnes_to_project * default_agr / 100\n", + " data[-1][\"values\"].append({\"year\": year, \"value\": purchase_tonnes, \"isProjected\": True})\n", + "\n", " return data\n", "\n", "\n", "## workflow:\n", "\n", - " ## 1. get georegion for new location - geolocate using geolocation estrategy. Output saved on georegion \n", - " ## 2. get risk for material and each indicator\n", - " ## 3. get average risk for each indicator in georegion\n", - " ## 4. calculate impact by multiplying volume time average risk in new location:\n", + "## 1. get georegion for new location - geolocate using geolocation estrategy. Output saved on georegion\n", + "## 2. get risk for material and each indicator\n", + "## 3. get average risk for each indicator in georegion\n", + "## 4. calculate impact by multiplying volume time average risk in new location:\n", + "\n", "\n", "def get_LG_base_estimates(material_id, georegion_id):\n", - " \n", " LG_base_estimates = {\n", - " 'c71eb531-2c8e-40d2-ae49-1049543be4d1': 0,#carbon_emissions_tCO2e_t\n", - " '633cf928-7c4f-41a3-99c5-e8c1bda0b323':0, #deforestation_ha_t\n", - " '0594aba7-70a5-460c-9b58-fc1802d264ea': 0, #biodiversity_impact_PDF_t\n", - " 'e2c00251-fe31-4330-8c38-604535d795dc':0 #water_use_m3_t\n", + " \"c71eb531-2c8e-40d2-ae49-1049543be4d1\": 0, # carbon_emissions_tCO2e_t\n", + " \"633cf928-7c4f-41a3-99c5-e8c1bda0b323\": 0, # deforestation_ha_t\n", + " \"0594aba7-70a5-460c-9b58-fc1802d264ea\": 0, # biodiversity_impact_PDF_t\n", + " \"e2c00251-fe31-4330-8c38-604535d795dc\": 0, # water_use_m3_t\n", " }\n", - " #get production tables for materials\n", + " # get production tables for materials\n", " sql_prod_tables = f\"\"\"select hd.\"h3tableName\", hd.\"h3columnName\" from h3_data hd where hd.id in (\n", " select mth.\"h3DataId\" from material_to_h3 mth where mth.\"materialId\" = '{material_id}' and mth.\"type\" ='producer')\"\"\"\n", " cursor.execute(sql_prod_tables)\n", " response_prodtables = cursor.fetchall()\n", - " prod_table= response_prodtables[0][0]\n", - " prod_column = response_prodtables[0][1]\n", + " prod_table = response_prodtables[0][0]\n", + " prod_column = response_prodtables[0][1]\n", "\n", - " ## get harvest tables \n", + " ## get harvest tables\n", " sql_ha_tables = f\"\"\"select hd.\"h3tableName\", hd.\"h3columnName\" from h3_data hd where hd.id in (\n", " select mth.\"h3DataId\" from material_to_h3 mth where mth.\"materialId\" = '{material_id}' and mth.\"type\" ='producer')\"\"\"\n", " cursor.execute(sql_ha_tables)\n", " response_hatables = cursor.fetchall()\n", - " ha_table= response_hatables[0][0]\n", - " ha_column = response_hatables[0][1]\n", - "\n", + " ha_table = response_hatables[0][0]\n", + " ha_column = response_hatables[0][1]\n", "\n", " ## water indicator factor ####\n", " # Water risk (m3 / tones * year) = water footprint (m3/year)/ Total Production (tons)\n", @@ -181,11 +188,11 @@ "\n", " cursor.execute(sql_wr)\n", " response_avg_wr = cursor.fetchall()\n", - " LG_base_estimates['e2c00251-fe31-4330-8c38-604535d795dc'] = response_avg_wr[0]\n", + " LG_base_estimates[\"e2c00251-fe31-4330-8c38-604535d795dc\"] = response_avg_wr[0]\n", "\n", " ## deforestation indicator factor ####\n", " # Deforestation risk (ha/tones * year) = deforestation risk (unitless) * Harvested area (ha)/ Total Production (tons)\n", - " sql_dr =f\"\"\"select avg(risk.drisk) from (\n", + " sql_dr = f\"\"\"select avg(risk.drisk) from (\n", " select prodtable.h3index, (indtable.\"hansenLoss2019\" * haTable.\"{ha_column}\") / sum(prodtable.\"{prod_column}\") over() drisk from {prod_table} prodtable \n", " left join h3_grid_deforestation_global indTable on indTable.h3index = prodTable.h3index \n", " left join {ha_table} haTable on haTable.h3index=prodtable.h3index \n", @@ -196,10 +203,10 @@ " select h3_uncompact(gr.\"h3Compact\"::h3index[],6) from geo_region gr where gr.id='{georegion_id}')\"\"\"\n", " cursor.execute(sql_dr)\n", " response_avg_dr = cursor.fetchall()\n", - " LG_base_estimates['633cf928-7c4f-41a3-99c5-e8c1bda0b323'] = response_avg_dr[0]\n", + " LG_base_estimates[\"633cf928-7c4f-41a3-99c5-e8c1bda0b323\"] = response_avg_dr[0]\n", "\n", " ## carbon indicator factor ####\n", - " # Carbon risk (tCO2eq / tones * year ) = carbon risk (tCO2eq / ha) * Deforestation risk (ha/tons) \n", + " # Carbon risk (tCO2eq / tones * year ) = carbon risk (tCO2eq / ha) * Deforestation risk (ha/tons)\n", "\n", " sql_cr = f\"\"\"select avg(risk.crisk) from (\n", " select prodtable.h3index, indTable.\"earthstat2000GlobalHectareEmissions\" * ((deftable.\"hansenLoss2019\" * haTable.\"{ha_column}\") / sum(prodtable.\"{prod_column}\") over()) crisk from {prod_table} prodtable \n", @@ -214,10 +221,10 @@ " select h3_uncompact(gr.\"h3Compact\"::h3index[],6) from geo_region gr where gr.id='{georegion_id}') \"\"\"\n", " cursor.execute(sql_cr)\n", " response_avg_cr = cursor.fetchall()\n", - " LG_base_estimates['c71eb531-2c8e-40d2-ae49-1049543be4d1'] = response_avg_cr[0]\n", + " LG_base_estimates[\"c71eb531-2c8e-40d2-ae49-1049543be4d1\"] = response_avg_cr[0]\n", "\n", " ## biodiversity indicator factor ####\n", - " ## biodiversity risk (PDF / tons * year) = biodiversity risk ( PDF/year *m2)*(1/0.0001) unit conversion \n", + " ## biodiversity risk (PDF / tons * year) = biodiversity risk ( PDF/year *m2)*(1/0.0001) unit conversion\n", " sql_br = f\"\"\"select avg(risk.crisk) from (\n", " select prodtable.h3index, indTable.\"lciaPslRPermanentCrops\" * ((deftable.\"hansenLoss2019\" * haTable.\"{ha_column}\") / sum(prodtable.\"{prod_column}\") over()) crisk from {prod_table} prodtable \n", " left join h3_grid_deforestation_global defTable on defTable.h3index = prodTable.h3index \n", @@ -231,18 +238,16 @@ " select h3_uncompact(gr.\"h3Compact\"::h3index[],6) from geo_region gr where gr.id='{georegion_id}') \"\"\"\n", " cursor.execute(sql_br)\n", " response_avg_br = cursor.fetchall()\n", - " LG_base_estimates['0594aba7-70a5-460c-9b58-fc1802d264ea'] = response_avg_br[0]\n", - " \n", + " LG_base_estimates[\"0594aba7-70a5-460c-9b58-fc1802d264ea\"] = response_avg_br[0]\n", + "\n", " return LG_base_estimates\n", "\n", "\n", "def calculate_new_impact(estimates, response):\n", - " new_response = [(el[0],\n", - " int(el[1]),\n", - " int(el[1])* estimates[el[3]][0],\n", - " el[3],\n", - " el[4],\n", - " el[5]) for el in response]\n", + " new_response = [\n", + " (el[0], int(el[1]), int(el[1]) * estimates[el[3]][0], el[3], el[4], el[5])\n", + " for el in response\n", + " ]\n", " return new_response" ] }, @@ -270,7 +275,7 @@ } ], "source": [ - "#set env\n", + "# set env\n", "## env file for gcs upload\n", "env_path = \".env\"\n", "with open(env_path) as f:\n", @@ -279,7 +284,7 @@ " env_key, _val = line.split(\"=\", 1)\n", " env_value = _val.split(\"\\n\")[0]\n", " env[env_key] = env_value\n", - " \n", + "\n", "list(env.keys())" ] }, @@ -291,12 +296,14 @@ "outputs": [], "source": [ "# set conexion to local ddbb\n", - "postgres_thread_pool = ThreadedConnectionPool(1, 50,\n", - " host=env['API_POSTGRES_HOST'],\n", - " port=env['API_POSTGRES_PORT'],\n", - " user=env['API_POSTGRES_USERNAME'],\n", - " password=env['API_POSTGRES_PASSWORD']\n", - " )\n" + "postgres_thread_pool = ThreadedConnectionPool(\n", + " 1,\n", + " 50,\n", + " host=env[\"API_POSTGRES_HOST\"],\n", + " port=env[\"API_POSTGRES_PORT\"],\n", + " user=env[\"API_POSTGRES_USERNAME\"],\n", + " password=env[\"API_POSTGRES_PASSWORD\"],\n", + ")" ] }, { @@ -334,7 +341,7 @@ } ], "source": [ - "Image(filename = \"../../datasets/raw/images/scenario_creation.png\", width = 900, height = 300)" + "Image(filename=\"../../datasets/raw/images/scenario_creation.png\", width=900, height=300)" ] }, { @@ -359,20 +366,21 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "#### PARAMS SELECTED BY THE USER#########\n", "volume_perc = 50\n", - "material = ['0d7b1be5-dc86-47b8-ba3a-25190a275011'] #rubber\n", + "material = [\"0d7b1be5-dc86-47b8-ba3a-25190a275011\"] # rubber\n", "business_unit = \"all\"\n", - "suppliers = 'all'\n", - "sourcing_regions= ('7625bf66-dd45-44b8-a15d-8733338660ba')#indonesia\n", - "end_year=2022\n", - "group_by ='material'\n", - "start_year = 2019 # note: which start year?\n", - "indicators = ('0594aba7-70a5-460c-9b58-fc1802d264ea', '633cf928-7c4f-41a3-99c5-e8c1bda0b323', 'c71eb531-2c8e-40d2-ae49-1049543be4d1', 'e2c00251-fe31-4330-8c38-604535d795dc') #all indicators\n", - "\n", - "\n", - "\n" + "suppliers = \"all\"\n", + "sourcing_regions = \"7625bf66-dd45-44b8-a15d-8733338660ba\" # indonesia\n", + "end_year = 2022\n", + "group_by = \"material\"\n", + "start_year = 2019 # note: which start year?\n", + "indicators = (\n", + " \"0594aba7-70a5-460c-9b58-fc1802d264ea\",\n", + " \"633cf928-7c4f-41a3-99c5-e8c1bda0b323\",\n", + " \"c71eb531-2c8e-40d2-ae49-1049543be4d1\",\n", + " \"e2c00251-fe31-4330-8c38-604535d795dc\",\n", + ") # all indicators" ] }, { @@ -391,7 +399,7 @@ } ], "source": [ - "#RETRIEVE DATA\n", + "# RETRIEVE DATA\n", "\n", "sql = f\"\"\"SELECT sr.\"year\", sum(sr.tonnage) tonnes, sum(ir.value) impact, i.id, i.\"shortName\", m.\"name\" FROM sourcing_records sr \n", " LEFT JOIN sourcing_location sl ON sl.id=sr.\"sourcingLocationId\" \n", @@ -418,14 +426,14 @@ " AND i.id in {indicators}\n", " GROUP BY sl.\"materialId\", sr.\"year\", i.id, i.\"shortName\", m.\"name\" -- add the group by element selected by the client\"\"\"\n", "\n", - "#print(sql)\n", + "# print(sql)\n", "conn = postgres_thread_pool.getconn()\n", "cursor = conn.cursor()\n", "\n", - "print('Requestng data..')\n", + "print(\"Requestng data..\")\n", "cursor.execute(sql)\n", "response = cursor.fetchall()\n", - "print('Done!')" + "print(\"Done!\")" ] }, { @@ -436,13 +444,10 @@ "outputs": [], "source": [ "# distribute volume:\n", - "vol_per =(volume_perc/len(material))/100\n", - "clean_response = [(el[0],\n", - " int(el[1])*vol_per,\n", - " int(el[2])*vol_per,\n", - " el[3],\n", - " el[4],\n", - " el[5]) for el in response]\n" + "vol_per = (volume_perc / len(material)) / 100\n", + "clean_response = [\n", + " (el[0], int(el[1]) * vol_per, int(el[2]) * vol_per, el[3], el[4], el[5]) for el in response\n", + "]" ] }, { @@ -552,8 +557,19 @@ } ], "source": [ - "df_ = pd.DataFrame(clean_response, columns={'year','tonnes', 'impact', 'indicatorId', 'indicator','groupby'})\n", - "df_ = df_.rename(columns ={'indicator':'year', 'indicatorId':'tonnes', 'groupby':'impact', 'year':'indicatorId','impact': 'indicator', 'tonnes':'groupby' })\n", + "df_ = pd.DataFrame(\n", + " clean_response, columns={\"year\", \"tonnes\", \"impact\", \"indicatorId\", \"indicator\", \"groupby\"}\n", + ")\n", + "df_ = df_.rename(\n", + " columns={\n", + " \"indicator\": \"year\",\n", + " \"indicatorId\": \"tonnes\",\n", + " \"groupby\": \"impact\",\n", + " \"year\": \"indicatorId\",\n", + " \"impact\": \"indicator\",\n", + " \"tonnes\": \"groupby\",\n", + " }\n", + ")\n", "df_.head()" ] }, @@ -606,7 +622,7 @@ } ], "source": [ - "Image(filename = \"../../datasets/raw/images/set_growth_rate.png\", width = 900, height = 300)" + "Image(filename=\"../../datasets/raw/images/set_growth_rate.png\", width=900, height=300)" ] }, { @@ -617,8 +633,8 @@ "outputs": [], "source": [ "# the growth rate can be set for all business units or for one of the business units ingested by the user\n", - "bu_gr = 'all'\n", - "growth_rate = 1.5 # expectations of how purchase of raw material will change into the future" + "bu_gr = \"all\"\n", + "growth_rate = 1.5 # expectations of how purchase of raw material will change into the future" ] }, { @@ -628,8 +644,10 @@ "metadata": {}, "outputs": [], "source": [ - "#parse the data to the structure required by the client\n", - "parsed_data = project_data(response=clean_response, default_agr=1.5, groupBy='material',start_year=2019, end_year=2022)" + "# parse the data to the structure required by the client\n", + "parsed_data = project_data(\n", + " response=clean_response, default_agr=1.5, groupBy=\"material\", start_year=2019, end_year=2022\n", + ")" ] }, { @@ -713,9 +731,9 @@ ], "source": [ "# exacmple for one location\n", - "print('indicator',parsed_data[0]['indicatorShortName'])\n", - "print('material', parsed_data[0]['rows'][0]['name'])\n", - "pd.DataFrame(parsed_data[0]['rows'][0]['values']).transpose()" + "print(\"indicator\", parsed_data[0][\"indicatorShortName\"])\n", + "print(\"material\", parsed_data[0][\"rows\"][0][\"name\"])\n", + "pd.DataFrame(parsed_data[0][\"rows\"][0][\"values\"]).transpose()" ] }, { @@ -776,7 +794,7 @@ } ], "source": [ - "Image(filename = \"../../datasets/raw/images/change_supplier.png\", width = 900, height = 300)" + "Image(filename=\"../../datasets/raw/images/change_supplier.png\", width=900, height=300)" ] }, { @@ -817,20 +835,18 @@ "metadata": {}, "outputs": [], "source": [ - "#SELECT NEW PARAMS FOR INTERVENTION\n", - "new_supplier = None #optional\n", - "new_producer = None #optional\n", + "# SELECT NEW PARAMS FOR INTERVENTION\n", + "new_supplier = None # optional\n", + "new_producer = None # optional\n", "\n", - "new_location_type = 'country of production'\n", - "new_country = 'ESP'\n", - "city = 'Valencia'\n", + "new_location_type = \"country of production\"\n", + "new_country = \"ESP\"\n", + "city = \"Valencia\"\n", "## in this case the georegioId is 7f7316fb-db05-4215-835b-68569923aead\n", "\n", - "#select LG location base estimates\n", + "# select LG location base estimates\n", "\n", - "select_LG_baseEstimates = True\n", - " \n", - "\n" + "select_LG_baseEstimates = True" ] }, { @@ -854,8 +870,8 @@ } ], "source": [ - "georegion_id = '7f7316fb-db05-4215-835b-68569923aead'\n", - "material_id = '0d7b1be5-dc86-47b8-ba3a-25190a275011'\n", + "georegion_id = \"7f7316fb-db05-4215-835b-68569923aead\"\n", + "material_id = \"0d7b1be5-dc86-47b8-ba3a-25190a275011\"\n", "\n", "estimates = get_LG_base_estimates(material_id, georegion_id)\n", "estimates" @@ -872,18 +888,23 @@ "# distribute volume:\n", "\n", "user_estimates = {\n", - " 'c71eb531-2c8e-40d2-ae49-1049543be4d1': (1,),\n", - " '633cf928-7c4f-41a3-99c5-e8c1bda0b323': (20,),\n", - " '0594aba7-70a5-460c-9b58-fc1802d264ea': (5,),\n", - " 'e2c00251-fe31-4330-8c38-604535d795dc': (70,)}\n", + " \"c71eb531-2c8e-40d2-ae49-1049543be4d1\": (1,),\n", + " \"633cf928-7c4f-41a3-99c5-e8c1bda0b323\": (20,),\n", + " \"0594aba7-70a5-460c-9b58-fc1802d264ea\": (5,),\n", + " \"e2c00251-fe31-4330-8c38-604535d795dc\": (70,),\n", + "}\n", "\n", - "#calculate impacts\n", + "# calculate impacts\n", "new_response = calculate_new_impact(estimates, clean_response)\n", "new_response_user = calculate_new_impact(user_estimates, clean_response)\n", "\n", - "#project impacts\n", - "new_parsed_data = project_data(response=new_response, default_agr=1.5, groupBy='material',start_year=2019, end_year=2022)\n", - "new_parsed_data_user = project_data(response=new_response_user, default_agr=1.5, groupBy='material',start_year=2019, end_year=2022)\n" + "# project impacts\n", + "new_parsed_data = project_data(\n", + " response=new_response, default_agr=1.5, groupBy=\"material\", start_year=2019, end_year=2022\n", + ")\n", + "new_parsed_data_user = project_data(\n", + " response=new_response_user, default_agr=1.5, groupBy=\"material\", start_year=2019, end_year=2022\n", + ")" ] }, { @@ -967,9 +988,9 @@ ], "source": [ "# exacmple for one location\n", - "print('indicator',parsed_data[0]['indicatorShortName'])\n", - "print('material', parsed_data[0]['rows'][0]['name'])\n", - "pd.DataFrame(parsed_data[0]['rows'][0]['values']).transpose()" + "print(\"indicator\", parsed_data[0][\"indicatorShortName\"])\n", + "print(\"material\", parsed_data[0][\"rows\"][0][\"name\"])\n", + "pd.DataFrame(parsed_data[0][\"rows\"][0][\"values\"]).transpose()" ] }, { @@ -1053,9 +1074,9 @@ ], "source": [ "# exacmple for one location\n", - "print('indicator',new_parsed_data[0]['indicatorShortName'])\n", - "print('material', new_parsed_data[0]['rows'][0]['name'])\n", - "pd.DataFrame(new_parsed_data[0]['rows'][0]['values']).transpose()" + "print(\"indicator\", new_parsed_data[0][\"indicatorShortName\"])\n", + "print(\"material\", new_parsed_data[0][\"rows\"][0][\"name\"])\n", + "pd.DataFrame(new_parsed_data[0][\"rows\"][0][\"values\"]).transpose()" ] }, { @@ -1139,9 +1160,9 @@ ], "source": [ "# exacmple for one location\n", - "print('indicator',new_parsed_data_user[0]['indicatorShortName'])\n", - "print('material', new_parsed_data_user[0]['rows'][0]['name'])\n", - "pd.DataFrame(new_parsed_data_user[0]['rows'][0]['values']).transpose()" + "print(\"indicator\", new_parsed_data_user[0][\"indicatorShortName\"])\n", + "print(\"material\", new_parsed_data_user[0][\"rows\"][0][\"name\"])\n", + "pd.DataFrame(new_parsed_data_user[0][\"rows\"][0][\"values\"]).transpose()" ] }, { @@ -1183,7 +1204,7 @@ } ], "source": [ - "Image(filename = \"../../datasets/raw/images/Change_raw_materials.png\", width = 900, height = 300)" + "Image(filename=\"../../datasets/raw/images/Change_raw_materials.png\", width=900, height=300)" ] }, { @@ -1207,12 +1228,12 @@ } ], "source": [ - "# e.g. change cotton in a supplier from india \n", - "new_material = '80d52237-bb4a-4f25-9133-cbbebaa68734'\n", - "georegion_id = '309d7e9d-0c3b-47b3-8176-406d2ebbf61e'\n", + "# e.g. change cotton in a supplier from india\n", + "new_material = \"80d52237-bb4a-4f25-9133-cbbebaa68734\"\n", + "georegion_id = \"309d7e9d-0c3b-47b3-8176-406d2ebbf61e\"\n", "\n", "estimates = get_LG_base_estimates(new_material, georegion_id)\n", - "estimates\n" + "estimates" ] }, { @@ -1296,16 +1317,18 @@ ], "source": [ "# calculate and project impacts:\n", - "#calculate impacts\n", + "# calculate impacts\n", "new_response = calculate_new_impact(estimates, clean_response)\n", "\n", - "#project impacts\n", - "new_parsed_data = project_data(response=new_response, default_agr=1.5, groupBy='material',start_year=2019, end_year=2022)\n", + "# project impacts\n", + "new_parsed_data = project_data(\n", + " response=new_response, default_agr=1.5, groupBy=\"material\", start_year=2019, end_year=2022\n", + ")\n", "\n", "# exacmple for one location\n", - "print('indicator',parsed_data[0]['indicatorShortName'])\n", - "print('material', parsed_data[0]['rows'][0]['name'])\n", - "pd.DataFrame(parsed_data[0]['rows'][0]['values']).transpose()" + "print(\"indicator\", parsed_data[0][\"indicatorShortName\"])\n", + "print(\"material\", parsed_data[0][\"rows\"][0][\"name\"])\n", + "pd.DataFrame(parsed_data[0][\"rows\"][0][\"values\"]).transpose()" ] }, { @@ -1389,9 +1412,9 @@ ], "source": [ "# exacmple for one location\n", - "print('indicator',new_parsed_data[0]['indicatorShortName'])\n", - "print('material', new_parsed_data[0]['rows'][0]['name'])\n", - "pd.DataFrame(new_parsed_data[0]['rows'][0]['values']).transpose()" + "print(\"indicator\", new_parsed_data[0][\"indicatorShortName\"])\n", + "print(\"material\", new_parsed_data[0][\"rows\"][0][\"name\"])\n", + "pd.DataFrame(new_parsed_data[0][\"rows\"][0][\"values\"]).transpose()" ] }, { @@ -1430,7 +1453,7 @@ } ], "source": [ - "Image(filename = \"../../datasets/raw/images/change_prod_efficiency.png\", width = 900, height = 300)\n" + "Image(filename=\"../../datasets/raw/images/change_prod_efficiency.png\", width=900, height=300)" ] }, { @@ -1517,21 +1540,24 @@ "# distribute volume:\n", "\n", "user_estimates = {\n", - " 'c71eb531-2c8e-40d2-ae49-1049543be4d1': (1,),\n", - " '633cf928-7c4f-41a3-99c5-e8c1bda0b323': (20,),\n", - " '0594aba7-70a5-460c-9b58-fc1802d264ea': (5,),\n", - " 'e2c00251-fe31-4330-8c38-604535d795dc': (70,)}\n", + " \"c71eb531-2c8e-40d2-ae49-1049543be4d1\": (1,),\n", + " \"633cf928-7c4f-41a3-99c5-e8c1bda0b323\": (20,),\n", + " \"0594aba7-70a5-460c-9b58-fc1802d264ea\": (5,),\n", + " \"e2c00251-fe31-4330-8c38-604535d795dc\": (70,),\n", + "}\n", "\n", - "#calculate impacts\n", + "# calculate impacts\n", "new_response_user = calculate_new_impact(user_estimates, clean_response)\n", "\n", - "#project impacts\n", - "new_parsed_data_user = project_data(response=new_response_user, default_agr=1.5, groupBy='material',start_year=2019, end_year=2022)\n", + "# project impacts\n", + "new_parsed_data_user = project_data(\n", + " response=new_response_user, default_agr=1.5, groupBy=\"material\", start_year=2019, end_year=2022\n", + ")\n", "\n", "# exacmple for one location\n", - "print('indicator',parsed_data[0]['indicatorShortName'])\n", - "print('material', parsed_data[0]['rows'][0]['name'])\n", - "pd.DataFrame(parsed_data[0]['rows'][0]['values']).transpose()" + "print(\"indicator\", parsed_data[0][\"indicatorShortName\"])\n", + "print(\"material\", parsed_data[0][\"rows\"][0][\"name\"])\n", + "pd.DataFrame(parsed_data[0][\"rows\"][0][\"values\"]).transpose()" ] }, { @@ -1615,9 +1641,9 @@ ], "source": [ "# exacmple for one location\n", - "print('indicator',new_parsed_data_user[0]['indicatorShortName'])\n", - "print('material', new_parsed_data_user[0]['rows'][0]['name'])\n", - "pd.DataFrame(new_parsed_data_user[0]['rows'][0]['values']).transpose()" + "print(\"indicator\", new_parsed_data_user[0][\"indicatorShortName\"])\n", + "print(\"material\", new_parsed_data_user[0][\"rows\"][0][\"name\"])\n", + "pd.DataFrame(new_parsed_data_user[0][\"rows\"][0][\"values\"]).transpose()" ] }, { diff --git a/data/notebooks/Lab/8_QA_water_impact.ipynb b/data/notebooks/Lab/8_QA_water_impact.ipynb index e74a75ce2..f51951d3c 100644 --- a/data/notebooks/Lab/8_QA_water_impact.ipynb +++ b/data/notebooks/Lab/8_QA_water_impact.ipynb @@ -71,13 +71,14 @@ "outputs": [], "source": [ "import os\n", - "import xarray as xr\n", - "import rioxarray as rxr\n", + "\n", "import cartopy.crs as ccrs\n", "import geopandas as gpd\n", + "import matplotlib.colors as colors\n", "import matplotlib.pyplot as plt\n", - "from matplotlib.colors import ListedColormap\n", - "import matplotlib.colors as colors" + "import rioxarray as rxr\n", + "import xarray as xr\n", + "from matplotlib.colors import ListedColormap" ] }, { @@ -94,21 +95,21 @@ "metadata": {}, "outputs": [], "source": [ - "def da_plot(da, gdf, color_list, values, title, x='x', y='y', xlim=(65,100), ylim=(4,40)):\n", + "def da_plot(da, gdf, color_list, values, title, x=\"x\", y=\"y\", xlim=(65, 100), ylim=(4, 40)):\n", " # Define the colors you want\n", " cmap = ListedColormap(color_list)\n", "\n", " # Define a normalization from values -> colors\n", " norm = colors.BoundaryNorm(values, len(color_list))\n", "\n", - " plt.figure(figsize=(12,10))\n", + " plt.figure(figsize=(12, 10))\n", " ax = plt.axes(projection=ccrs.PlateCarree())\n", "\n", " ax.set_global()\n", "\n", - " gdf.plot(ax=ax, color='red', alpha=0.1, edgecolor='red')\n", + " gdf.plot(ax=ax, color=\"red\", alpha=0.1, edgecolor=\"red\")\n", " da.plot(ax=ax, norm=norm, cmap=cmap, x=x, y=y, transform=ccrs.PlateCarree())\n", - " gdf.plot(ax=ax, color='w', alpha=0.1, edgecolor='red')\n", + " gdf.plot(ax=ax, color=\"w\", alpha=0.1, edgecolor=\"red\")\n", " ax.coastlines()\n", " ax.set_xlim(xlim)\n", " ax.set_ylim(ylim)\n", @@ -192,8 +193,8 @@ } ], "source": [ - "#import geometry:\n", - "geom = gpd.read_file('../../datasets/raw/water/QA/gadm36_IND_0.shp')\n", + "# import geometry:\n", + "geom = gpd.read_file(\"../../datasets/raw/water/QA/gadm36_IND_0.shp\")\n", "geom.head()" ] }, @@ -608,9 +609,9 @@ } ], "source": [ - "xda = rxr.open_rasterio('../../datasets/raw/water/QA/bl_wf_mmyr_area.tif').squeeze().drop(\"band\")\n", - "# convert to Dataset \n", - "xds_wf = xr.Dataset({'water_footprint': xda}, attrs=xda.attrs)\n", + "xda = rxr.open_rasterio(\"../../datasets/raw/water/QA/bl_wf_mmyr_area.tif\").squeeze().drop(\"band\")\n", + "# convert to Dataset\n", + "xds_wf = xr.Dataset({\"water_footprint\": xda}, attrs=xda.attrs)\n", "xds_wf" ] }, @@ -634,10 +635,20 @@ ], "source": [ "color_list = [\"#ffffff\", \"#73b3d8\", \"#2879b9\", \"#08306b\"]\n", - "values = [0, 29584100, 863202440, 10063202440, 105581714153]\n", - "title = 'Water footprint (mm/yr)'\n", + "values = [0, 29584100, 863202440, 10063202440, 105581714153]\n", + "title = \"Water footprint (mm/yr)\"\n", "\n", - "da_plot(xds_wf['water_footprint'], geom, color_list, values, title, x='x', y='y', xlim=(65,100), ylim=(4,40))" + "da_plot(\n", + " xds_wf[\"water_footprint\"],\n", + " geom,\n", + " color_list,\n", + " values,\n", + " title,\n", + " x=\"x\",\n", + " y=\"y\",\n", + " xlim=(65, 100),\n", + " ylim=(4, 40),\n", + ")" ] }, { @@ -658,7 +669,7 @@ "outputs": [], "source": [ "# Define path\n", - "path = '../../datasets/raw/spam2010v2r0_global_prod/'" + "path = \"../../datasets/raw/spam2010v2r0_global_prod/\"" ] }, { @@ -667,29 +678,31 @@ "metadata": {}, "outputs": [], "source": [ - "tech_dict = {'All': 'A', 'Irrigated': 'I', 'Rainfed': 'R'}\n", + "tech_dict = {\"All\": \"A\", \"Irrigated\": \"I\", \"Rainfed\": \"R\"}\n", "\n", "for i, tech in enumerate(tech_dict.keys()):\n", - " included_extensions = [f'{tech_dict[tech]}.tif']\n", - " file_names = [fn for fn in os.listdir(path) if any(fn.endswith(ext) for ext in included_extensions)]\n", + " included_extensions = [f\"{tech_dict[tech]}.tif\"]\n", + " file_names = [\n", + " fn for fn in os.listdir(path) if any(fn.endswith(ext) for ext in included_extensions)\n", + " ]\n", "\n", " for j, file in enumerate(file_names):\n", - " xda = rxr.open_rasterio(path+file).squeeze().drop(\"band\")\n", + " xda = rxr.open_rasterio(path + file).squeeze().drop(\"band\")\n", "\n", " # Remove negative values\n", " xda = xda.where(xda > 0)\n", "\n", " # Add crop coordinates\n", - " crop_name = file.split('_')[-2]\n", - " xda = xda.assign_coords({\"crop\": crop_name}).expand_dims(['crop']) \n", + " crop_name = file.split(\"_\")[-2]\n", + " xda = xda.assign_coords({\"crop\": crop_name}).expand_dims([\"crop\"])\n", "\n", - " # Convert to Dataset \n", + " # Convert to Dataset\n", " xds_tmp = xr.Dataset({tech: xda}, attrs=xda.attrs)\n", "\n", " if j == 0:\n", - " xds_tech = xds_tmp \n", + " xds_tech = xds_tmp\n", " else:\n", - " xds_tech = xr.concat([xds_tech, xds_tmp], dim='crop')\n", + " xds_tech = xr.concat([xds_tech, xds_tmp], dim=\"crop\")\n", "\n", " if i == 0:\n", " xds_crop = xds_tech\n", @@ -1246,9 +1259,19 @@ "source": [ "color_list = [\"#ffffff\", \"#d5e1df\", \"#e3eaa7\", \"#b5e7a0\", \"#86af49\"]\n", "values = [0, 1, 100, 1000, 10000, 20000]\n", - "title = 'All irrigated crop production (t)'\n", + "title = \"All irrigated crop production (t)\"\n", "\n", - "da_plot(xds_crop.sum('crop')['Irrigated'], geom, color_list, values, title, x='x', y='y', xlim=(65,100), ylim=(4,40))" + "da_plot(\n", + " xds_crop.sum(\"crop\")[\"Irrigated\"],\n", + " geom,\n", + " color_list,\n", + " values,\n", + " title,\n", + " x=\"x\",\n", + " y=\"y\",\n", + " xlim=(65, 100),\n", + " ylim=(4, 40),\n", + ")" ] }, { @@ -1660,13 +1683,13 @@ } ], "source": [ - "xda = rxr.open_rasterio(path+'spam2010V2r0_global_P_COTT_I.tif').squeeze().drop(\"band\")\n", + "xda = rxr.open_rasterio(path + \"spam2010V2r0_global_P_COTT_I.tif\").squeeze().drop(\"band\")\n", "\n", "# Remove negative values\n", "xda = xda.where(xda > 0)\n", "\n", - "# Convert to Dataset \n", - "xds_cp = xr.Dataset({'cotton_production': xda}, attrs=xda.attrs)\n", + "# Convert to Dataset\n", + "xds_cp = xr.Dataset({\"cotton_production\": xda}, attrs=xda.attrs)\n", "xds_cp" ] }, @@ -1691,9 +1714,19 @@ "source": [ "color_list = [\"#ffffff\", \"#d5e1df\", \"#e3eaa7\", \"#b5e7a0\", \"#86af49\"]\n", "values = [0, 1, 100, 1000, 5000, 10000]\n", - "title = 'Irrigated cotton production (t)'\n", + "title = \"Irrigated cotton production (t)\"\n", "\n", - "da_plot(xds_cp['cotton_production'], geom, color_list, values, title, x='x', y='y', xlim=(65,100), ylim=(4,40))" + "da_plot(\n", + " xds_cp[\"cotton_production\"],\n", + " geom,\n", + " color_list,\n", + " values,\n", + " title,\n", + " x=\"x\",\n", + " y=\"y\",\n", + " xlim=(65, 100),\n", + " ylim=(4, 40),\n", + ")" ] }, { @@ -1717,8 +1750,8 @@ } ], "source": [ - "tot_pro = xds_cp['cotton_production'].sum().data\n", - "print(f'Total production of cotton: {tot_pro} tonnes')" + "tot_pro = xds_cp[\"cotton_production\"].sum().data\n", + "print(f\"Total production of cotton: {tot_pro} tonnes\")" ] }, { @@ -2130,13 +2163,19 @@ } ], "source": [ - "xda = rxr.open_rasterio('../../datasets/raw/spam2010v2r0_global_harv_area/spam2010V2r0_global_H_COTT_I.tif').squeeze().drop(\"band\")\n", + "xda = (\n", + " rxr.open_rasterio(\n", + " \"../../datasets/raw/spam2010v2r0_global_harv_area/spam2010V2r0_global_H_COTT_I.tif\"\n", + " )\n", + " .squeeze()\n", + " .drop(\"band\")\n", + ")\n", "\n", "# Remove negative values\n", "xda = xda.where(xda > 0)\n", "\n", - "# Convert to Dataset \n", - "xds_cha = xr.Dataset({'cotton_harvested_area': xda}, attrs=xda.attrs)\n", + "# Convert to Dataset\n", + "xds_cha = xr.Dataset({\"cotton_harvested_area\": xda}, attrs=xda.attrs)\n", "xds_cha" ] }, @@ -2161,9 +2200,19 @@ "source": [ "color_list = [\"#ffffff\", \"#d5e1df\", \"#e3eaa7\", \"#b5e7a0\", \"#86af49\"]\n", "values = [0, 1, 10, 100, 1000, 7000]\n", - "title = 'Cotton harvested area (ha)'\n", + "title = \"Cotton harvested area (ha)\"\n", "\n", - "da_plot(xds_cha['cotton_harvested_area'], geom, color_list, values, title, x='x', y='y', xlim=(65,100), ylim=(4,40))" + "da_plot(\n", + " xds_cha[\"cotton_harvested_area\"],\n", + " geom,\n", + " color_list,\n", + " values,\n", + " title,\n", + " x=\"x\",\n", + " y=\"y\",\n", + " xlim=(65, 100),\n", + " ylim=(4, 40),\n", + ")" ] }, { @@ -2630,9 +2679,13 @@ } ], "source": [ - "xda = rxr.open_rasterio('../../datasets/raw/water/QA/bl_wf_mmyr_area_MAPSPAM_ext.tif').squeeze().drop(\"band\")\n", - "# Convert to Dataset \n", - "xds_wf = xr.Dataset({'water_footprint': xda}, attrs=xda.attrs)\n", + "xda = (\n", + " rxr.open_rasterio(\"../../datasets/raw/water/QA/bl_wf_mmyr_area_MAPSPAM_ext.tif\")\n", + " .squeeze()\n", + " .drop(\"band\")\n", + ")\n", + "# Convert to Dataset\n", + "xds_wf = xr.Dataset({\"water_footprint\": xda}, attrs=xda.attrs)\n", "\n", "# Assign MAPSPAM coords\n", "xds_wf = xds_wf.assign_coords(x=xds_cha.x.values)\n", @@ -2641,9 +2694,9 @@ "xds = xds_wf.copy()\n", "\n", "# Add all variables\n", - "xds['all_crop_production'] = xds_crop.sum('crop')['Irrigated']\n", - "xds['cotton_production'] = xds_cp['cotton_production']\n", - "xds['cotton_harvested_area'] = xds_cha['cotton_harvested_area']\n", + "xds[\"all_crop_production\"] = xds_crop.sum(\"crop\")[\"Irrigated\"]\n", + "xds[\"cotton_production\"] = xds_cp[\"cotton_production\"]\n", + "xds[\"cotton_harvested_area\"] = xds_cha[\"cotton_harvested_area\"]\n", "\n", "xds" ] @@ -3102,9 +3155,13 @@ ], "source": [ "xds_ind = xds.rio.clip(geom.geometry, geom.crs, drop=False, invert=False)\n", - "xds_ind[\"all_crop_production\"] = xds_ind[\"all_crop_production\"].where(xds_ind[\"all_crop_production\"] > 0)\n", - "xds_ind['cotton_production'] = xds_ind['cotton_production'].where(xds_ind['cotton_production'] > 0)\n", - "xds_ind['cotton_harvested_area'] = xds_ind['cotton_harvested_area'].where(xds_ind['cotton_harvested_area'] > 0)\n", + "xds_ind[\"all_crop_production\"] = xds_ind[\"all_crop_production\"].where(\n", + " xds_ind[\"all_crop_production\"] > 0\n", + ")\n", + "xds_ind[\"cotton_production\"] = xds_ind[\"cotton_production\"].where(xds_ind[\"cotton_production\"] > 0)\n", + "xds_ind[\"cotton_harvested_area\"] = xds_ind[\"cotton_harvested_area\"].where(\n", + " xds_ind[\"cotton_harvested_area\"] > 0\n", + ")\n", "xds_ind" ] }, @@ -3129,9 +3186,19 @@ "source": [ "color_list = [\"#ffffff\", \"#d5e1df\", \"#e3eaa7\", \"#b5e7a0\", \"#86af49\"]\n", "values = [0, 1, 10, 100, 1000, 7000]\n", - "title = 'Cotton harvested area (hectares)'\n", + "title = \"Cotton harvested area (hectares)\"\n", "\n", - "da_plot(xds_ind['cotton_harvested_area'], geom, color_list, values, title, x='x', y='y', xlim=(65,100), ylim=(4,40))" + "da_plot(\n", + " xds_ind[\"cotton_harvested_area\"],\n", + " geom,\n", + " color_list,\n", + " values,\n", + " title,\n", + " x=\"x\",\n", + " y=\"y\",\n", + " xlim=(65, 100),\n", + " ylim=(4, 40),\n", + ")" ] }, { @@ -3156,7 +3223,7 @@ ], "source": [ "tot_pro = xds_ind[\"cotton_production\"].sum().data\n", - "print(f'Total production of cotton in India: {tot_pro} tons')" + "print(f\"Total production of cotton in India: {tot_pro} tons\")" ] }, { @@ -3181,7 +3248,7 @@ ], "source": [ "tot_ha = xds_ind[\"cotton_harvested_area\"].sum().data\n", - "print(f'Total harvest area of cotton in India: {tot_ha} hectares')" + "print(f\"Total harvest area of cotton in India: {tot_ha} hectares\")" ] }, { @@ -3199,7 +3266,9 @@ "metadata": {}, "outputs": [], "source": [ - "xds_ind = xds_ind.assign(water_risk = (xds_ind[\"water_footprint\"]*mm_to_m3)/xds_ind[\"all_crop_production\"])" + "xds_ind = xds_ind.assign(\n", + " water_risk=(xds_ind[\"water_footprint\"] * mm_to_m3) / xds_ind[\"all_crop_production\"]\n", + ")" ] }, { @@ -3223,8 +3292,18 @@ "source": [ "color_list = [\"#ffffff\", \"#96ceb4\", \"#ffeead\", \"#ffcc5c\", \"#ff6f69\"]\n", "values = [0, 100, 500, 1000, 5000, 10000]\n", - "title = 'Water risk (m3/yr*t)'\n", - "da_plot(xds_ind['water_risk'], geom, color_list, values, title, x='x', y='y', xlim=(65,100), ylim=(4,40))" + "title = \"Water risk (m3/yr*t)\"\n", + "da_plot(\n", + " xds_ind[\"water_risk\"],\n", + " geom,\n", + " color_list,\n", + " values,\n", + " title,\n", + " x=\"x\",\n", + " y=\"y\",\n", + " xlim=(65, 100),\n", + " ylim=(4, 40),\n", + ")" ] }, { @@ -3252,8 +3331,8 @@ } ], "source": [ - "mean_risk = ((xds_ind[\"water_footprint\"]*mm_to_m3)/xds_ind[\"all_crop_production\"]).mean().data\n", - "print(f'Total water impact for cotton in India:: {mean_risk * volume} m3/yr')" + "mean_risk = ((xds_ind[\"water_footprint\"] * mm_to_m3) / xds_ind[\"all_crop_production\"]).mean().data\n", + "print(f\"Total water impact for cotton in India:: {mean_risk * volume} m3/yr\")" ] }, { @@ -3273,7 +3352,9 @@ "metadata": {}, "outputs": [], "source": [ - "xds_ind = xds_ind.assign(probability_purchase_area = (volume/tot_ha)* xds_ind[\"cotton_harvested_area\"])" + "xds_ind = xds_ind.assign(\n", + " probability_purchase_area=(volume / tot_ha) * xds_ind[\"cotton_harvested_area\"]\n", + ")" ] }, { @@ -3297,8 +3378,18 @@ "source": [ "color_list = [\"#ffffff\", \"#a2b9bc\", \"#878f99\", \"#b2ad7f\", \"#6b5b95\"]\n", "values = [0, 0.005, 0.01, 0.025, 0.05, 0.1]\n", - "title = 'Probability purchase area (t)'\n", - "da_plot(xds_ind['probability_purchase_area'], geom, color_list, values, title, x='x', y='y', xlim=(65,100), ylim=(4,40))" + "title = \"Probability purchase area (t)\"\n", + "da_plot(\n", + " xds_ind[\"probability_purchase_area\"],\n", + " geom,\n", + " color_list,\n", + " values,\n", + " title,\n", + " x=\"x\",\n", + " y=\"y\",\n", + " xlim=(65, 100),\n", + " ylim=(4, 40),\n", + ")" ] }, { @@ -3323,7 +3414,7 @@ ], "source": [ "tot_volume = xds_ind[\"probability_purchase_area\"].sum().data\n", - "print(f'Total distrivuted volume of cottom in India: {tot_volume} t')" + "print(f\"Total distrivuted volume of cottom in India: {tot_volume} t\")" ] }, { @@ -3341,7 +3432,10 @@ "metadata": {}, "outputs": [], "source": [ - "xds_ind = xds_ind.assign(water_impact = ((xds_ind[\"water_footprint\"]*mm_to_m3)/xds_ind[\"all_crop_production\"]) * xds_ind['probability_purchase_area'])" + "xds_ind = xds_ind.assign(\n", + " water_impact=((xds_ind[\"water_footprint\"] * mm_to_m3) / xds_ind[\"all_crop_production\"])\n", + " * xds_ind[\"probability_purchase_area\"]\n", + ")" ] }, { @@ -3366,9 +3460,19 @@ "color_list = [\"#ffffff\", \"#b2b2b2\", \"#f4e1d2\", \"#f18973\", \"#bc5a45\"]\n", "values = [0, 0.1, 4, 8, 32, 64]\n", "\n", - "title = 'Distribution of the water impact for 745 tonnes of cotton in India (m3/year)'\n", + "title = \"Distribution of the water impact for 745 tonnes of cotton in India (m3/year)\"\n", "\n", - "da_plot(xds_ind['water_impact'],geom, color_list, values, title, x='x', y='y', xlim=(65,100), ylim=(4,40))" + "da_plot(\n", + " xds_ind[\"water_impact\"],\n", + " geom,\n", + " color_list,\n", + " values,\n", + " title,\n", + " x=\"x\",\n", + " y=\"y\",\n", + " xlim=(65, 100),\n", + " ylim=(4, 40),\n", + ")" ] }, { @@ -3392,8 +3496,8 @@ } ], "source": [ - "tot_impact = xds_ind['water_impact'].sum().data\n", - "print(f'Total water impact for 745 tonnes of cotton in India:: {tot_impact} m3/yr')" + "tot_impact = xds_ind[\"water_impact\"].sum().data\n", + "print(f\"Total water impact for 745 tonnes of cotton in India:: {tot_impact} m3/yr\")" ] }, { @@ -3417,8 +3521,16 @@ } ], "source": [ - "tot_impact_all = (((xds_ind[\"water_footprint\"]*mm_to_m3)/xds_ind[\"all_crop_production\"]) * (xds_ind[\"cotton_harvested_area\"]/tot_ha) * tot_pro).sum().data\n", - "print(f'Total water impact for cotton in India:: {tot_impact_all} m3/yr')" + "tot_impact_all = (\n", + " (\n", + " ((xds_ind[\"water_footprint\"] * mm_to_m3) / xds_ind[\"all_crop_production\"])\n", + " * (xds_ind[\"cotton_harvested_area\"] / tot_ha)\n", + " * tot_pro\n", + " )\n", + " .sum()\n", + " .data\n", + ")\n", + "print(f\"Total water impact for cotton in India:: {tot_impact_all} m3/yr\")" ] }, { diff --git a/data/notebooks/Lab/8_Water_risk_test.ipynb b/data/notebooks/Lab/8_Water_risk_test.ipynb index 02a667c8c..04d29f2b3 100644 --- a/data/notebooks/Lab/8_Water_risk_test.ipynb +++ b/data/notebooks/Lab/8_Water_risk_test.ipynb @@ -24,19 +24,10 @@ "metadata": {}, "outputs": [], "source": [ - "import geopandas as gpd\n", + "import matplotlib.pyplot as plt\n", "import pandas as pd\n", - "from shapely.geometry import Point\n", "import rasterio as rio\n", - "import rasterio.plot\n", - "import matplotlib.pyplot as plt\n", - "from rasterio.plot import show_hist\n", - "import time\n", - "from rasterstats import gen_zonal_stats, gen_point_query, zonal_stats\n", - "from shapely.geometry import shape, mapping\n", - "import folium\n", - "import numpy as np\n", - "import rioxarray" + "import rasterio.plot" ] }, { @@ -75,8 +66,8 @@ } ], "source": [ - "blwf_soy_orig='../../datasets/raw/Water/Reports_Commodities/Soybean/wfbl_mmyr/hdr.adf'\n", - "blwf_soy_p='../../datasets/raw/Water/Reports_Commodities/Soybean/wfbl_soyb_epsg4326.tif'\n", + "blwf_soy_orig = \"../../datasets/raw/Water/Reports_Commodities/Soybean/wfbl_mmyr/hdr.adf\"\n", + "blwf_soy_p = \"../../datasets/raw/Water/Reports_Commodities/Soybean/wfbl_soyb_epsg4326.tif\"\n", "!rm -rf $blwf_soy_p\n", "!gdal_translate -a_srs EPSG:4326 -of GTiff $blwf_soy_orig $blwf_soy_p\n", "!gdal_calc.py -A $blwf_soy_p --outfile $blwf_soy_p --calc 'A*103' --overwrite --quiet" @@ -168,8 +159,10 @@ } ], "source": [ - "blwf_default_orig='../../datasets/raw/Water/Report50-WF-of-prodn-RasterFiles/wf_bltot_mmyr/hdr.adf'\n", - "blwf_default_p='../../datasets/raw/Water/Report50-WF-of-prodn-RasterFiles/wf_bltot_mmyr/wfbl_default_epsg4326.tif'\n", + "blwf_default_orig = (\n", + " \"../../datasets/raw/Water/Report50-WF-of-prodn-RasterFiles/wf_bltot_mmyr/hdr.adf\"\n", + ")\n", + "blwf_default_p = \"../../datasets/raw/Water/Report50-WF-of-prodn-RasterFiles/wf_bltot_mmyr/wfbl_default_epsg4326.tif\"\n", "\n", "!rm -rf $blwf_default_p\n", "!gdal_translate -a_srs EPSG:4326 -of GTiff $blwf_default_orig $blwf_default_p\n", @@ -260,13 +253,15 @@ } ], "source": [ - "prod_soy_orig='../../datasets/raw/spam2010v2r0_global_prod.geotiff/spam2010V2r0_global_P_SOYB_A.tif'\n", - "prod_soy_p='../../datasets/raw/spam2010v2r0_global_prod.geotiff/spam2010V2r0_global_P_SOYB_A_new_extent.tif'\n", + "prod_soy_orig = (\n", + " \"../../datasets/raw/spam2010v2r0_global_prod.geotiff/spam2010V2r0_global_P_SOYB_A.tif\"\n", + ")\n", + "prod_soy_p = \"../../datasets/raw/spam2010v2r0_global_prod.geotiff/spam2010V2r0_global_P_SOYB_A_new_extent.tif\"\n", "\n", "!rm -rf $prod_soy_p\n", "!gdal_translate -projwin -179.9916666 83.0883333 180.0083333 -55.9116667 -of GTiff $prod_soy_orig $prod_soy_p\n", "\n", - "#Remove pixels with codified NA value (-1)\n", + "# Remove pixels with codified NA value (-1)\n", "#!gdal_calc.py -A $prod_soy_p --outfile $prod_soy_p --calc 'A*(A>0)' --overwrite --quiet" ] }, @@ -347,7 +342,7 @@ "# for lack of a better method: production = width * height * pct of valid pixels * mean\n", "tot_prod = 4320 * 1668 * 0.1156 * 300.32094059949\n", "\n", - "print(f'Total production: {tot_prod}')" + "print(f\"Total production: {tot_prod}\")" ] }, { @@ -465,12 +460,12 @@ } ], "source": [ - "with rio.open('../../datasets/processed/water_indicators/water_risk_soy_especific.tif') as src:\n", + "with rio.open(\"../../datasets/processed/water_indicators/water_risk_soy_especific.tif\") as src:\n", " image_array = src.read(1)\n", - " #msk = src.read_masks()\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " rio.plot.show(image_array, vmin=1, cmap='Blues' , ax=ax)\n", - " ax.set_title('Soy especific water use')\n" + " # msk = src.read_masks()\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " rio.plot.show(image_array, vmin=1, cmap=\"Blues\", ax=ax)\n", + " ax.set_title(\"Soy especific water use\")" ] }, { @@ -571,12 +566,12 @@ } ], "source": [ - "with rio.open('../../datasets/processed/water_indicators/water_risk_soy_default.tif') as src:\n", + "with rio.open(\"../../datasets/processed/water_indicators/water_risk_soy_default.tif\") as src:\n", " image_array = src.read(1)\n", - " #msk = src.read_masks()\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " rio.plot.show(image_array, vmin=1, cmap='Blues' , ax=ax)\n", - " ax.set_title('Soy default water use')" + " # msk = src.read_masks()\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " rio.plot.show(image_array, vmin=1, cmap=\"Blues\", ax=ax)\n", + " ax.set_title(\"Soy default water use\")" ] }, { @@ -604,8 +599,8 @@ } ], "source": [ - "especific_map='../../datasets/processed/water_indicators/water_risk_soy_especific.tif'\n", - "default_map='../../datasets/processed/water_indicators/water_risk_soy_default.tif'\n", + "especific_map = \"../../datasets/processed/water_indicators/water_risk_soy_especific.tif\"\n", + "default_map = \"../../datasets/processed/water_indicators/water_risk_soy_default.tif\"\n", "\n", "!gdal_calc.py -A $especific_map -B $default_map --outfile='../../datasets/processed/water_indicators/water_risk_soy_DIFF_esp-def.tif' --calc=\"(A-B)/A\" --overwrite --quiet" ] @@ -689,12 +684,12 @@ } ], "source": [ - "with rio.open('../../datasets/processed/water_indicators/water_risk_soy_DIFF_esp-def.tif') as src:\n", + "with rio.open(\"../../datasets/processed/water_indicators/water_risk_soy_DIFF_esp-def.tif\") as src:\n", " image_array = src.read(1)\n", - " #msk = src.read_masks()\n", - " fig, ax = plt.subplots(figsize=[30,20])\n", - " rio.plot.show(image_array,vmin=-4, cmap='RdBu' , ax=ax)\n", - " ax.set_title('Soy water use differences (Especific - Default)')" + " # msk = src.read_masks()\n", + " fig, ax = plt.subplots(figsize=[30, 20])\n", + " rio.plot.show(image_array, vmin=-4, cmap=\"RdBu\", ax=ax)\n", + " ax.set_title(\"Soy water use differences (Especific - Default)\")" ] }, { @@ -720,10 +715,10 @@ "metadata": {}, "outputs": [], "source": [ - "especific_map='../../datasets/processed/water_indicators/water_risk_soy_especific.tif'\n", - "default_map='../../datasets/processed/water_indicators/water_risk_soy_default.tif'\n", - "prod_soy_p='../../datasets/raw/spam2010v2r0_global_prod.geotiff/spam2010V2r0_global_P_SOYB_A_new_extent.tif'\n", - "diff_map='../../datasets/processed/water_indicators/water_risk_soy_DIFF_esp-def.tif'\n", + "especific_map = \"../../datasets/processed/water_indicators/water_risk_soy_especific.tif\"\n", + "default_map = \"../../datasets/processed/water_indicators/water_risk_soy_default.tif\"\n", + "prod_soy_p = \"../../datasets/raw/spam2010v2r0_global_prod.geotiff/spam2010V2r0_global_P_SOYB_A_new_extent.tif\"\n", + "diff_map = \"../../datasets/processed/water_indicators/water_risk_soy_DIFF_esp-def.tif\"\n", "\n", "#!gdal2xyz.py -csv $diff_map '../../datasets/processed/water_indicators/differences_soy.csv'" ] @@ -858,9 +853,11 @@ } ], "source": [ - "diff=pd.read_csv('../../datasets/processed/water_indicators/differences_soy.xyz', sep=\" \", header=None)\n", - "diff.columns=[\"X\", \"Y\", \"difference\"]\n", - "#diff.dropna(inplace=True)\n", + "diff = pd.read_csv(\n", + " \"../../datasets/processed/water_indicators/differences_soy.xyz\", sep=\" \", header=None\n", + ")\n", + "diff.columns = [\"X\", \"Y\", \"difference\"]\n", + "# diff.dropna(inplace=True)\n", "diff.describe()" ] }, @@ -946,7 +943,7 @@ } ], "source": [ - "diff[(diff['difference'] > -10) & (diff['difference'] < 0)].head()" + "diff[(diff[\"difference\"] > -10) & (diff[\"difference\"] < 0)].head()" ] }, { @@ -1052,7 +1049,7 @@ } ], "source": [ - "diff[(diff['difference'] > -10)].describe()" + "diff[(diff[\"difference\"] > -10)].describe()" ] }, { @@ -1087,13 +1084,13 @@ } ], "source": [ - "print('Saving especific data')\n", + "print(\"Saving especific data\")\n", "!gdal_translate -projwin -126 49 -72 25 -of 'xyz' $especific_map '../../datasets/processed/water_indicators/risk_especific_soy.xyz'\n", - "print('Saving default data')\n", + "print(\"Saving default data\")\n", "!gdal_translate -projwin -126 49 -72 25 -of 'xyz' $default_map '../../datasets/processed/water_indicators/risk_default_soy.xyz'\n", - "print('Saving production data')\n", + "print(\"Saving production data\")\n", "!gdal_translate -projwin -126 49 -72 25 -of 'xyz' $prod_soy_p '../../datasets/processed/water_indicators/production_soy.xyz'\n", - "print('All done')" + "print(\"All done\")" ] }, { @@ -1103,17 +1100,21 @@ "metadata": {}, "outputs": [], "source": [ - "esp=pd.read_csv('../../datasets/processed/water_indicators/risk_especific_soy.xyz', sep=\" \", header=None)\n", - "esp.columns=[\"X\", \"Y\", \"esp_risk\"]\n", - "#esp.dropna(inplace=True)\n", + "esp = pd.read_csv(\n", + " \"../../datasets/processed/water_indicators/risk_especific_soy.xyz\", sep=\" \", header=None\n", + ")\n", + "esp.columns = [\"X\", \"Y\", \"esp_risk\"]\n", + "# esp.dropna(inplace=True)\n", "\n", - "#dflt=pd.read_csv('../../datasets/processed/water_indicators/risk_especific_soy.xyz', sep=\" \", header=None)\n", - "#dflt.columns=[\"X\", \"Y\", \"dflt_risk\"]\n", - "#dflt.dropna(inplace=True)\n", + "# dflt=pd.read_csv('../../datasets/processed/water_indicators/risk_especific_soy.xyz', sep=\" \", header=None)\n", + "# dflt.columns=[\"X\", \"Y\", \"dflt_risk\"]\n", + "# dflt.dropna(inplace=True)\n", "\n", - "prod=pd.read_csv('../../datasets/processed/water_indicators/production_soy.xyz', sep=\" \", header=None)\n", - "prod.columns=[\"X\", \"Y\", \"production_soy\"]\n", - "#prod.dropna(inplace=True)" + "prod = pd.read_csv(\n", + " \"../../datasets/processed/water_indicators/production_soy.xyz\", sep=\" \", header=None\n", + ")\n", + "prod.columns = [\"X\", \"Y\", \"production_soy\"]\n", + "# prod.dropna(inplace=True)" ] }, { @@ -1219,7 +1220,7 @@ } ], "source": [ - "prod[prod['production_soy']>1].describe()" + "prod[prod[\"production_soy\"] > 1].describe()" ] }, { @@ -1325,10 +1326,14 @@ } ], "source": [ - "prod=prod.round({'X':2, 'Y':2})\n", - "diff=diff.round({'X':2, 'Y':2})\n", - "df_combined=prod.merge(diff['difference'], left_index=True, right_index=True, how='inner').merge(esp['esp_risk'], left_index=True, right_index=True, how='inner')\n", - "df_combined_clean=df_combined[(df_combined['production_soy']>1) & (df_combined['difference'] > -10)] \n", + "prod = prod.round({\"X\": 2, \"Y\": 2})\n", + "diff = diff.round({\"X\": 2, \"Y\": 2})\n", + "df_combined = prod.merge(diff[\"difference\"], left_index=True, right_index=True, how=\"inner\").merge(\n", + " esp[\"esp_risk\"], left_index=True, right_index=True, how=\"inner\"\n", + ")\n", + "df_combined_clean = df_combined[\n", + " (df_combined[\"production_soy\"] > 1) & (df_combined[\"difference\"] > -10)\n", + "]\n", "df_combined_clean.head()" ] }, @@ -1568,7 +1573,7 @@ } ], "source": [ - "df_combined_clean[df_combined_clean['difference'] != 0].describe()" + "df_combined_clean[df_combined_clean[\"difference\"] != 0].describe()" ] }, { @@ -1589,7 +1594,7 @@ } ], "source": [ - "df_sampled=df_combined_clean[df_combined_clean['difference'] != 0].sample(frac=0.1)\n", + "df_sampled = df_combined_clean[df_combined_clean[\"difference\"] != 0].sample(frac=0.1)\n", "df_sampled.shape" ] }, @@ -1623,15 +1628,15 @@ } ], "source": [ - "fig, ax = plt.subplots(figsize=[30,15])\n", - "x=df_sampled['difference']\n", - "y=df_sampled['production_soy']\n", - "z=df_sampled['esp_risk']\n", - "plt.scatter(x, y, c=z, cmap=\"coolwarm\", s=z*1500, vmin = -10, vmax=0)\n", - "plt.xlabel('Proportional difference', fontsize = 25)\n", - "plt.ylabel('Production', fontsize = 25)\n", + "fig, ax = plt.subplots(figsize=[30, 15])\n", + "x = df_sampled[\"difference\"]\n", + "y = df_sampled[\"production_soy\"]\n", + "z = df_sampled[\"esp_risk\"]\n", + "plt.scatter(x, y, c=z, cmap=\"coolwarm\", s=z * 1500, vmin=-10, vmax=0)\n", + "plt.xlabel(\"Proportional difference\", fontsize=25)\n", + "plt.ylabel(\"Production\", fontsize=25)\n", "\n", - "#Size represents water risk" + "# Size represents water risk" ] }, { diff --git a/data/notebooks/Lab/9_contextual_IDHI_dataset.ipynb b/data/notebooks/Lab/9_contextual_IDHI_dataset.ipynb index dbc55831f..4d5457209 100644 --- a/data/notebooks/Lab/9_contextual_IDHI_dataset.ipynb +++ b/data/notebooks/Lab/9_contextual_IDHI_dataset.ipynb @@ -9,12 +9,9 @@ "source": [ "import csv\n", "\n", - "import geopandas as gpd\n", "import pandas as pd\n", - "import numpy as np\n", "import requests\n", - "from psycopg2.pool import ThreadedConnectionPool\n", - "from shapely.geometry import shape" + "from psycopg2.pool import ThreadedConnectionPool" ] }, { diff --git a/data/notebooks/Lab/BLWF_indicator_coeficients.ipynb b/data/notebooks/Lab/BLWF_indicator_coeficients.ipynb index def2227db..bd3930a16 100644 --- a/data/notebooks/Lab/BLWF_indicator_coeficients.ipynb +++ b/data/notebooks/Lab/BLWF_indicator_coeficients.ipynb @@ -17,10 +17,9 @@ "metadata": {}, "outputs": [], "source": [ - "#import libraries\n", - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt" + "# import libraries\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd" ] }, { @@ -276,9 +275,9 @@ } ], "source": [ - "#open file with blue water footprint for crop products (m3/t)\n", - "file_path = '../../datasets/raw/TRASE_data/WFN/Report47-Appendix/Report47-Appendix-II_clean.xlsx'\n", - "df_c = pd.read_excel(file_path, sheet_name='App-II-WF_perTon')\n", + "# open file with blue water footprint for crop products (m3/t)\n", + "file_path = \"../../datasets/raw/TRASE_data/WFN/Report47-Appendix/Report47-Appendix-II_clean.xlsx\"\n", + "df_c = pd.read_excel(file_path, sheet_name=\"App-II-WF_perTon\")\n", "df_c.head()" ] }, @@ -503,9 +502,9 @@ } ], "source": [ - "#open file with bluw eater footprint for animal products (m3/t)\n", - "file_path = '../../datasets/raw/TRASE_data/WFN/Report48-Appendix-V_clean.xlsx'\n", - "df_a = pd.read_excel(file_path, sheet_name='App-V_WF_HS_SITC')\n", + "# open file with bluw eater footprint for animal products (m3/t)\n", + "file_path = \"../../datasets/raw/TRASE_data/WFN/Report48-Appendix-V_clean.xlsx\"\n", + "df_a = pd.read_excel(file_path, sheet_name=\"App-V_WF_HS_SITC\")\n", "df_a.head()" ] }, @@ -737,21 +736,27 @@ } ], "source": [ - "#try to remove the Grazing and Mixed columns from the animal dataframe\n", - "word1 = 'Mixed'\n", - "word2 = 'Grazing'\n", - "word3 = 'Industrial'\n", + "# try to remove the Grazing and Mixed columns from the animal dataframe\n", + "word1 = \"Mixed\"\n", + "word2 = \"Grazing\"\n", + "word3 = \"Industrial\"\n", "\n", "\n", - "#check if the column index contains missing values\n", + "# check if the column index contains missing values\n", "has_missing_values = df_a.columns.get_level_values(0).isna().any()\n", "\n", "if has_missing_values:\n", - " #replace missing values with an empty string\n", - " df_a.columns = d_a.columns.fillna('')\n", + " # replace missing values with an empty string\n", + " df_a.columns = d_a.columns.fillna(\"\")\n", "\n", - "#apply a boolean mask to remove columns with specific worlds in the header\n", - "df_a = df_a.drop(columns=df_a.columns[df_a.columns.get_level_values(0).str.contains(word1, na=False) | df_a.columns.get_level_values(0).str.contains(word2, na=False) | df_a.columns.get_level_values(0).str.contains(word3, na=False)])\n", + "# apply a boolean mask to remove columns with specific worlds in the header\n", + "df_a = df_a.drop(\n", + " columns=df_a.columns[\n", + " df_a.columns.get_level_values(0).str.contains(word1, na=False)\n", + " | df_a.columns.get_level_values(0).str.contains(word2, na=False)\n", + " | df_a.columns.get_level_values(0).str.contains(word3, na=False)\n", + " ]\n", + ")\n", "df_a.head()" ] }, @@ -969,18 +974,20 @@ } ], "source": [ - "#Remove the unamed from the crop dataframe\n", - "word = 'Unnamed'\n", + "# Remove the unamed from the crop dataframe\n", + "word = \"Unnamed\"\n", "\n", - "#check if the column index contains missing values\n", + "# check if the column index contains missing values\n", "has_missing_values = df_c.columns.get_level_values(0).isna().any()\n", "\n", "if has_missing_values:\n", - " #replace missing values with an empty string\n", - " df_c.columns = df_c.columns.fillna('')\n", + " # replace missing values with an empty string\n", + " df_c.columns = df_c.columns.fillna(\"\")\n", "\n", - "#apply a boolean mask to remove columns with specific worlds in the header\n", - "df_c = df_c.drop(columns=df_c.columns[df_c.columns.get_level_values(0).str.contains(word, na=False)])\n", + "# apply a boolean mask to remove columns with specific worlds in the header\n", + "df_c = df_c.drop(\n", + " columns=df_c.columns[df_c.columns.get_level_values(0).str.contains(word, na=False)]\n", + ")\n", "df_c.head()" ] }, @@ -1522,7 +1529,7 @@ } ], "source": [ - "concat_df[concat_df['Parent_code']==18]" + "concat_df[concat_df[\"Parent_code\"] == 18]" ] }, { @@ -1626,7 +1633,7 @@ } ], "source": [ - "#open already prepared indicator coeficients csv\n", + "# open already prepared indicator coeficients csv\n", "df_cf = pd.read_csv(\"../../datasets/raw/TRASE_data/WFN/bwfp_indicator_coefficients.csv\")\n", "df_cf.head()" ] @@ -1759,22 +1766,22 @@ "%%time\n", "updated_value = []\n", "for i, row in df_cf.iterrows():\n", - " hs_code = row['hs_2017_code']\n", - " country = row['country']\n", - " #print(f'Searching blwf value for hscode {hs_code} and location equial to {country}...')\n", + " hs_code = row[\"hs_2017_code\"]\n", + " country = row[\"country\"]\n", + " # print(f'Searching blwf value for hscode {hs_code} and location equial to {country}...')\n", " try:\n", - " concat_df_ = concat_df[concat_df['HS_code_simpl'] == hs_code]\n", - " blwf_value = concat_df_[f'{country}'].mean() \n", - " #we don't do the sum of all the childs as the childs are sometimes the same, so for getting the parent we perform just the average\n", - " #blwf_value = concat_df_[f'{country}'].sum() \n", + " concat_df_ = concat_df[concat_df[\"HS_code_simpl\"] == hs_code]\n", + " blwf_value = concat_df_[f\"{country}\"].mean()\n", + " # we don't do the sum of all the childs as the childs are sometimes the same, so for getting the parent we perform just the average\n", + " # blwf_value = concat_df_[f'{country}'].sum()\n", " except:\n", " blwf_value = None\n", " updated_value.append(blwf_value)\n", "\n", - "df_cf['Updated_value'] = updated_value\n", + "df_cf[\"Updated_value\"] = updated_value\n", "\n", "\n", - "df_cf.head() " + "df_cf.head()" ] }, { @@ -1853,9 +1860,9 @@ } ], "source": [ - "#get the parent code so we can sum all the childrens\n", - "parent_df = concat_df[['HS_code_simpl','Parent_code']]\n", - "parent_df = parent_df.rename(columns={'HS_code_simpl':'hs_2017_code'})\n", + "# get the parent code so we can sum all the childrens\n", + "parent_df = concat_df[[\"HS_code_simpl\", \"Parent_code\"]]\n", + "parent_df = parent_df.rename(columns={\"HS_code_simpl\": \"hs_2017_code\"})\n", "parent_df = parent_df.drop_duplicates()\n", "parent_df.head()" ] @@ -1973,8 +1980,8 @@ } ], "source": [ - "#merge the parent values with the blwf dataframe so we know the childrens that we have to sum\n", - "merged_df = df_cf.merge(parent_df, on='hs_2017_code', how = 'left')\n", + "# merge the parent values with the blwf dataframe so we know the childrens that we have to sum\n", + "merged_df = df_cf.merge(parent_df, on=\"hs_2017_code\", how=\"left\")\n", "merged_df.head()" ] }, @@ -2009,7 +2016,7 @@ ], "source": [ "# Sum values based on another column\n", - "sum_df = merged_df.groupby(['Parent_code', 'country'])['Updated_value'].sum()\n", + "sum_df = merged_df.groupby([\"Parent_code\", \"country\"])[\"Updated_value\"].sum()\n", "sum_df" ] }, @@ -5609,20 +5616,22 @@ "### search for the parent value and add the total\n", "\n", "for i, row in df_cf.iterrows():\n", - " hs_code = row['hs_2017_code']\n", - " country = row['country']\n", - " #is_null = str(row['Updated_value'])\n", + " hs_code = row[\"hs_2017_code\"]\n", + " country = row[\"country\"]\n", + " # is_null = str(row['Updated_value'])\n", " if len(str(hs_code)) < 3:\n", - " #get the total value for the country and hscode\n", - " filtered_df = total_values[(total_values['country'] == country) & (total_values['Parent_code'] == hs_code)]\n", + " # get the total value for the country and hscode\n", + " filtered_df = total_values[\n", + " (total_values[\"country\"] == country) & (total_values[\"Parent_code\"] == hs_code)\n", + " ]\n", " try:\n", - " parent_value = list(filtered_df['Updated_value'])[0]\n", + " parent_value = list(filtered_df[\"Updated_value\"])[0]\n", " except:\n", - " print(f'No value for {hs_code} and {country}')\n", - " print(list(filtered_df['Updated_value']))\n", + " print(f\"No value for {hs_code} and {country}\")\n", + " print(list(filtered_df[\"Updated_value\"]))\n", " parent_value = None\n", - " df_cf.loc[i, 'Updated_value'] = parent_value\n", - " \n", + " df_cf.loc[i, \"Updated_value\"] = parent_value\n", + "\n", "df_cf.head()" ] }, @@ -5633,7 +5642,7 @@ "metadata": {}, "outputs": [], "source": [ - "df_cf.to_csv('../../datasets/raw/TRASE_data/WFN/updated_blwf_v4.csv')" + "df_cf.to_csv(\"../../datasets/raw/TRASE_data/WFN/updated_blwf_v4.csv\")" ] }, { @@ -5749,8 +5758,8 @@ } ], "source": [ - "#explore the differentce between the old value and the updated one\n", - "df_cf['difference'] = df_cf['value'] - df_cf['Updated_value']\n", + "# explore the differentce between the old value and the updated one\n", + "df_cf[\"difference\"] = df_cf[\"value\"] - df_cf[\"Updated_value\"]\n", "df_cf.head()" ] }, @@ -5867,7 +5876,7 @@ } ], "source": [ - "error_greater_than_1 = df_cf[df_cf['difference']>1]\n", + "error_greater_than_1 = df_cf[df_cf[\"difference\"] > 1]\n", "error_greater_than_1.head()" ] }, @@ -5924,7 +5933,7 @@ } ], "source": [ - "error_greater_than_1[error_greater_than_1['hs_2017_code']==9]" + "error_greater_than_1[error_greater_than_1[\"hs_2017_code\"] == 9]" ] }, { @@ -5980,8 +5989,9 @@ } ], "source": [ - "\n", - "error_greater_than_1[(error_greater_than_1['Updated_value']>3349) & (error_greater_than_1['Updated_value']<3350)]" + "error_greater_than_1[\n", + " (error_greater_than_1[\"Updated_value\"] > 3349) & (error_greater_than_1[\"Updated_value\"] < 3350)\n", + "]" ] }, { @@ -6002,7 +6012,7 @@ } ], "source": [ - "len(error_greater_than_1)/len(df_cf)" + "len(error_greater_than_1) / len(df_cf)" ] }, { @@ -6025,13 +6035,11 @@ } ], "source": [ - "\n", - "\n", "# Plot the difference column\n", - "plt.plot(df_cf['difference'])\n", - "plt.xlabel('Index')\n", - "plt.ylabel('Difference')\n", - "plt.title('Difference between value and updated value')\n", + "plt.plot(df_cf[\"difference\"])\n", + "plt.xlabel(\"Index\")\n", + "plt.ylabel(\"Difference\")\n", + "plt.title(\"Difference between value and updated value\")\n", "plt.show()" ] }, @@ -6140,9 +6148,8 @@ } ], "source": [ - "\n", - "df_cf_clean = df_cf[['name','hs_2017_code', 'country', 'Updated_value']]\n", - "df_cf_clean = df_cf_clean.rename(columns={'Updated_value':'value'})\n", + "df_cf_clean = df_cf[[\"name\", \"hs_2017_code\", \"country\", \"Updated_value\"]]\n", + "df_cf_clean = df_cf_clean.rename(columns={\"Updated_value\": \"value\"})\n", "df_cf_clean.head()" ] }, @@ -6160,17 +6167,17 @@ "\n", "new_hs_code_str = []\n", "for i, row in df_cf_clean.iterrows():\n", - " hs_code = row['hs_2017_code']\n", - " \n", + " hs_code = row[\"hs_2017_code\"]\n", + "\n", " if len(str(hs_code)) == 1 or len(str(hs_code)) == 3:\n", - " new_hs_code = '0'+str(hs_code)\n", + " new_hs_code = \"0\" + str(hs_code)\n", " else:\n", " new_hs_code = str(hs_code)\n", - " \n", - " new_hs_code_str.append(new_hs_code) \n", - " \n", "\n", - "df_cf_clean['hs_code_updated_str'] = new_hs_code_str" + " new_hs_code_str.append(new_hs_code)\n", + "\n", + "\n", + "df_cf_clean[\"hs_code_updated_str\"] = new_hs_code_str" ] }, { @@ -6268,10 +6275,10 @@ } ], "source": [ - "#update csv with str hscode to improve the match\n", - "df_cf_clean_v2 = df_cf_clean[['name', 'hs_code_updated_str','country','value']]\n", - "#rename the column\n", - "df_cf_clean_v2 = df_cf_clean_v2.rename(columns={'hs_code_updated_str':'hs_2017_code'})\n", + "# update csv with str hscode to improve the match\n", + "df_cf_clean_v2 = df_cf_clean[[\"name\", \"hs_code_updated_str\", \"country\", \"value\"]]\n", + "# rename the column\n", + "df_cf_clean_v2 = df_cf_clean_v2.rename(columns={\"hs_code_updated_str\": \"hs_2017_code\"})\n", "df_cf_clean_v2.head()" ] }, @@ -6282,8 +6289,10 @@ "metadata": {}, "outputs": [], "source": [ - "#export and test ingestion\n", - "df_cf_clean_v2.to_csv(\"../../datasets/raw/TRASE_data/WFN/bwfp_indicator_coefficients_updated_v4.csv\")" + "# export and test ingestion\n", + "df_cf_clean_v2.to_csv(\n", + " \"../../datasets/raw/TRASE_data/WFN/bwfp_indicator_coefficients_updated_v4.csv\"\n", + ")" ] }, { @@ -6387,8 +6396,8 @@ } ], "source": [ - "dtype_mapping = {'hs_2017_code': str} \n", - "file = '../../datasets/raw/TRASE_data/WFN/bwfp_indicator_coefficients_updated_v4.csv'\n", + "dtype_mapping = {\"hs_2017_code\": str}\n", + "file = \"../../datasets/raw/TRASE_data/WFN/bwfp_indicator_coefficients_updated_v4.csv\"\n", "df = pd.read_csv(file, dtype=dtype_mapping)\n", "df.head()" ] @@ -6410,8 +6419,8 @@ "metadata": {}, "outputs": [], "source": [ - "#get the list of countries that have been included in the original blwf dataframe\n", - "original_admins = set(list(df['country']))" + "# get the list of countries that have been included in the original blwf dataframe\n", + "original_admins = set(list(df[\"country\"]))" ] }, { @@ -6421,7 +6430,7 @@ "metadata": {}, "outputs": [], "source": [ - "#get the list of all the countries that exist in the original dataframe\n", + "# get the list of all the countries that exist in the original dataframe\n", "all_admins = set(list(df_c[5:]))" ] }, @@ -6432,7 +6441,7 @@ "metadata": {}, "outputs": [], "source": [ - "#identify the ones from all admins that havent been include on the blwf csv and that need to be included\n", + "# identify the ones from all admins that havent been include on the blwf csv and that need to be included\n", "admin_regions_not_in_file = all_admins - original_admins" ] }, @@ -6462,18 +6471,17 @@ } ], "source": [ - "\n", - "#create a list of dataframes with the new admin regions to contact\n", + "# create a list of dataframes with the new admin regions to contact\n", "list_df_ = []\n", "sorted_list = sorted(admin_regions_not_in_file)\n", "for index, admin in enumerate(sorted_list):\n", - " df_global = df_cf_clean_v2[df_cf_clean_v2['country']=='Global']\n", - " df_global['country']=admin\n", - " df_global['value']=0\n", - " \n", + " df_global = df_cf_clean_v2[df_cf_clean_v2[\"country\"] == \"Global\"]\n", + " df_global[\"country\"] = admin\n", + " df_global[\"value\"] = 0\n", + "\n", " df_ = df_global\n", - " \n", - " list_df_.append(df_)\n" + "\n", + " list_df_.append(df_)" ] }, { @@ -6571,7 +6579,7 @@ } ], "source": [ - "#concat the list of dataframes with the new admins\n", + "# concat the list of dataframes with the new admins\n", "new_admins_df_ = pd.concat(list_df_)\n", "new_admins_df_.head()" ] @@ -6797,16 +6805,16 @@ } ], "source": [ - "#add the leading 0 to the simplied hs code so we can work with the same hscode index\n", + "# add the leading 0 to the simplied hs code so we can work with the same hscode index\n", "updated_hscode = []\n", - "for i,row in concat_df.iterrows():\n", - " hs_code = str(row['HS_code_simpl'])\n", - " if (len(hs_code)) ==1 or (len(hs_code)==3):\n", - " updated_hs_code = \"0\"+hs_code\n", + "for i, row in concat_df.iterrows():\n", + " hs_code = str(row[\"HS_code_simpl\"])\n", + " if (len(hs_code)) == 1 or (len(hs_code) == 3):\n", + " updated_hs_code = \"0\" + hs_code\n", " else:\n", - " updated_hs_code =hs_code\n", + " updated_hs_code = hs_code\n", " updated_hscode.append(updated_hs_code)\n", - "concat_df['updated_hscode'] = updated_hscode\n", + "concat_df[\"updated_hscode\"] = updated_hscode\n", "concat_df.head()" ] }, @@ -6914,20 +6922,20 @@ ], "source": [ "%%time\n", - "#get the value for each admin region from the original file\n", + "# get the value for each admin region from the original file\n", "updated_value = []\n", "for i, row in new_admins_df_.iterrows():\n", - " hs_code = row['hs_2017_code']\n", - " country = row['country']\n", + " hs_code = row[\"hs_2017_code\"]\n", + " country = row[\"country\"]\n", " try:\n", - " concat_df_ = concat_df[concat_df['updated_hscode'] == hs_code]\n", - " blwf_value = concat_df_[f'{country}'].mean() \n", + " concat_df_ = concat_df[concat_df[\"updated_hscode\"] == hs_code]\n", + " blwf_value = concat_df_[f\"{country}\"].mean()\n", " except:\n", " blwf_value = None\n", - " updated_value.append(blwf_value) \n", - " \n", - "new_admins_df_['value'] = updated_value\n", - "new_admins_df_.head() " + " updated_value.append(blwf_value)\n", + "\n", + "new_admins_df_[\"value\"] = updated_value\n", + "new_admins_df_.head()" ] }, { @@ -6989,8 +6997,10 @@ } ], "source": [ - "#double check with local file that the values are correct\n", - "new_admins_df_[(new_admins_df_['country']=='Badakhshan') & (new_admins_df_['hs_2017_code']=='0910')]" + "# double check with local file that the values are correct\n", + "new_admins_df_[\n", + " (new_admins_df_[\"country\"] == \"Badakhshan\") & (new_admins_df_[\"hs_2017_code\"] == \"0910\")\n", + "]" ] }, { @@ -7094,9 +7104,9 @@ } ], "source": [ - "#add the parent code to the new admins dataframe so we can sum the childrens to get the parent value\n", - "new_admins_df_['Parent_code'] = [el[:2] for el in new_admins_df_['hs_2017_code']]\n", - "new_admins_df_.head()\n" + "# add the parent code to the new admins dataframe so we can sum the childrens to get the parent value\n", + "new_admins_df_[\"Parent_code\"] = [el[:2] for el in new_admins_df_[\"hs_2017_code\"]]\n", + "new_admins_df_.head()" ] }, { @@ -7260,8 +7270,10 @@ } ], "source": [ - "#double check that the parent values are correct\n", - "new_admins_df_[(new_admins_df_['Parent_code']=='09') & (new_admins_df_['country']=='Abengourou')]" + "# double check that the parent values are correct\n", + "new_admins_df_[\n", + " (new_admins_df_[\"Parent_code\"] == \"09\") & (new_admins_df_[\"country\"] == \"Abengourou\")\n", + "]" ] }, { @@ -7294,8 +7306,8 @@ } ], "source": [ - "#group by country and parent code to get totals by parent\n", - "sum_df = new_admins_df_.groupby(['Parent_code', 'country'])['value'].sum()\n", + "# group by country and parent code to get totals by parent\n", + "sum_df = new_admins_df_.groupby([\"Parent_code\", \"country\"])[\"value\"].sum()\n", "sum_df" ] }, @@ -7426,7 +7438,7 @@ } ], "source": [ - "#create a dataframe with the parent values so we can join with the new admin areas\n", + "# create a dataframe with the parent values so we can join with the new admin areas\n", "total_values = pd.DataFrame(sum_df)\n", "total_values = total_values.reset_index()\n", "total_values" @@ -7486,8 +7498,8 @@ } ], "source": [ - "#double check the value\n", - "total_values[(total_values['country']=='Abengourou')& (total_values['Parent_code']=='09')]" + "# double check the value\n", + "total_values[(total_values[\"country\"] == \"Abengourou\") & (total_values[\"Parent_code\"] == \"09\")]" ] }, { @@ -7653,8 +7665,14 @@ } ], "source": [ - "#merge the new admins dataframe with the parent totals to get the parent value\n", - "new_admins_updated = pd.merge(new_admins_df_, total_values,how='left', left_on = ['hs_2017_code', 'country'], right_on = ['Parent_code', 'country'])\n", + "# merge the new admins dataframe with the parent totals to get the parent value\n", + "new_admins_updated = pd.merge(\n", + " new_admins_df_,\n", + " total_values,\n", + " how=\"left\",\n", + " left_on=[\"hs_2017_code\", \"country\"],\n", + " right_on=[\"Parent_code\", \"country\"],\n", + ")\n", "new_admins_updated.head()" ] }, @@ -7779,12 +7797,12 @@ "source": [ "updated_value = []\n", "for i, row in new_admins_updated.iterrows():\n", - " if len(row['hs_2017_code'])==2:\n", - " value = row['value_y']\n", + " if len(row[\"hs_2017_code\"]) == 2:\n", + " value = row[\"value_y\"]\n", " else:\n", - " value = row['value_x']\n", + " value = row[\"value_x\"]\n", " updated_value.append(value)\n", - "new_admins_updated['updated_value'] = updated_value\n", + "new_admins_updated[\"updated_value\"] = updated_value\n", "new_admins_updated.head()" ] }, @@ -7998,8 +8016,10 @@ } ], "source": [ - "#double check\n", - "new_admins_updated[(new_admins_updated['Parent_code_x']=='09') & (new_admins_updated['country']=='Abengourou')]\n" + "# double check\n", + "new_admins_updated[\n", + " (new_admins_updated[\"Parent_code_x\"] == \"09\") & (new_admins_updated[\"country\"] == \"Abengourou\")\n", + "]" ] }, { @@ -8097,10 +8117,10 @@ } ], "source": [ - "#clean file\n", + "# clean file\n", "\n", - "new_admins_updated = new_admins_updated[['name', 'hs_2017_code','country', 'updated_value']]\n", - "new_admins_updated = new_admins_updated.rename(columns={'updated_value':'value'})\n", + "new_admins_updated = new_admins_updated[[\"name\", \"hs_2017_code\", \"country\", \"updated_value\"]]\n", + "new_admins_updated = new_admins_updated.rename(columns={\"updated_value\": \"value\"})\n", "new_admins_updated.head()" ] }, @@ -8160,7 +8180,9 @@ } ], "source": [ - "new_admins_updated[(new_admins_updated['hs_2017_code']=='09') & (new_admins_updated['country']=='Abengourou')]" + "new_admins_updated[\n", + " (new_admins_updated[\"hs_2017_code\"] == \"09\") & (new_admins_updated[\"country\"] == \"Abengourou\")\n", + "]" ] }, { @@ -8258,7 +8280,7 @@ } ], "source": [ - "#concat with the previous country file\n", + "# concat with the previous country file\n", "df_cf_clean_v2.head()" ] }, @@ -8357,7 +8379,7 @@ } ], "source": [ - "blwf_updated_admins = pd.concat([df_cf_clean_v2,new_admins_updated])\n", + "blwf_updated_admins = pd.concat([df_cf_clean_v2, new_admins_updated])\n", "blwf_updated_admins.head()" ] }, @@ -8417,8 +8439,10 @@ } ], "source": [ - "#double check\n", - "blwf_updated_admins[(blwf_updated_admins['hs_2017_code']=='09') & (blwf_updated_admins['country']=='Bulgaria')]" + "# double check\n", + "blwf_updated_admins[\n", + " (blwf_updated_admins[\"hs_2017_code\"] == \"09\") & (blwf_updated_admins[\"country\"] == \"Bulgaria\")\n", + "]" ] }, { @@ -8428,7 +8452,9 @@ "metadata": {}, "outputs": [], "source": [ - "blwf_updated_admins.to_csv(\"../../datasets/raw/TRASE_data/WFN/bwfp_updates_csv/updated_v6_admins/bwfp_indicator_coefficients_updated_admins_v6.csv\")" + "blwf_updated_admins.to_csv(\n", + " \"../../datasets/raw/TRASE_data/WFN/bwfp_updates_csv/updated_v6_admins/bwfp_indicator_coefficients_updated_admins_v6.csv\"\n", + ")" ] }, { diff --git a/data/notebooks/Lab/Excel_validation_logic.ipynb b/data/notebooks/Lab/Excel_validation_logic.ipynb index 54a0fc82e..da99b1cdd 100644 --- a/data/notebooks/Lab/Excel_validation_logic.ipynb +++ b/data/notebooks/Lab/Excel_validation_logic.ipynb @@ -31,11 +31,12 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", + "import re\n", + "\n", "import numpy as np\n", + "import pandas as pd\n", "import pandera as pa\n", - "from pandera.typing import Series\n", - "import re" + "from pandera.typing import Series" ] }, { @@ -263,12 +264,14 @@ } ], "source": [ - "sheet_url = \"https://docs.google.com/spreadsheets/d/16sQlhPXGaFpDPi_QWDsUCTZVJQMl9C8z6_KFJoBUR1Y/edit#gid=0\"\n", - "url = sheet_url.replace('/edit#gid=', '/export?format=csv&gid=')\n", + "sheet_url = (\n", + " \"https://docs.google.com/spreadsheets/d/16sQlhPXGaFpDPi_QWDsUCTZVJQMl9C8z6_KFJoBUR1Y/edit#gid=0\"\n", + ")\n", + "url = sheet_url.replace(\"/edit#gid=\", \"/export?format=csv&gid=\")\n", "df = pd.read_csv(url)\n", "\n", - "df.columns = df.iloc[2].str.lower().str.strip().str.replace(' ', '_')\n", - "df = df.rename(columns = lambda x: re.sub('_\\(º[en]\\)', '', x))\n", + "df.columns = df.iloc[2].str.lower().str.strip().str.replace(\" \", \"_\")\n", + "df = df.rename(columns=lambda x: re.sub(\"_\\(º[en]\\)\", \"\", x))\n", "df = df.drop([0, 1, 2])\n", "\n", "df.head()" @@ -295,17 +298,34 @@ "outputs": [], "source": [ "class data_validation(pa.SchemaModel):\n", - " material: Series[str] = pa.Field(str_matches= \"[A-Za-z]*\", allow_duplicates=True, nullable=False)\n", - " business_unit: Series[str] = pa.Field(str_matches= \"[A-Za-z]*\", allow_duplicates=True, nullable=False)\n", - " location_type: Series[str] = pa.Field(str_matches= \"[A-Za-z]*\", allow_duplicates=True, nullable=False)\n", - " country: Series[str] = pa.Field(str_matches= \"[A-Za-z]*\", allow_duplicates=True, nullable=False)\n", - " tons: Series[int] = pa.Field(alias ='(.*_tons)', nullable=False, allow_duplicates=True, regex=True, coerce=True, in_range={\"min_value\": 0, \"max_value\": np.iinfo(np.int32).max})\n", - " latitude: Series[float] = pa.Field(nullable=True, allow_duplicates=True, coerce=True, in_range={\"min_value\": -90, \"max_value\": 90})\n", - " longitude: Series[float] = pa.Field(nullable=True, allow_duplicates=True, coerce=True, in_range={\"min_value\": -180, \"max_value\": 180})\n", - "\n", - " \n", - " \n", - " " + " material: Series[str] = pa.Field(str_matches=\"[A-Za-z]*\", allow_duplicates=True, nullable=False)\n", + " business_unit: Series[str] = pa.Field(\n", + " str_matches=\"[A-Za-z]*\", allow_duplicates=True, nullable=False\n", + " )\n", + " location_type: Series[str] = pa.Field(\n", + " str_matches=\"[A-Za-z]*\", allow_duplicates=True, nullable=False\n", + " )\n", + " country: Series[str] = pa.Field(str_matches=\"[A-Za-z]*\", allow_duplicates=True, nullable=False)\n", + " tons: Series[int] = pa.Field(\n", + " alias=\"(.*_tons)\",\n", + " nullable=False,\n", + " allow_duplicates=True,\n", + " regex=True,\n", + " coerce=True,\n", + " in_range={\"min_value\": 0, \"max_value\": np.iinfo(np.int32).max},\n", + " )\n", + " latitude: Series[float] = pa.Field(\n", + " nullable=True,\n", + " allow_duplicates=True,\n", + " coerce=True,\n", + " in_range={\"min_value\": -90, \"max_value\": 90},\n", + " )\n", + " longitude: Series[float] = pa.Field(\n", + " nullable=True,\n", + " allow_duplicates=True,\n", + " coerce=True,\n", + " in_range={\"min_value\": -180, \"max_value\": 180},\n", + " )" ] }, { @@ -551,37 +571,45 @@ "source": [ "def location_validation(df):\n", " for l in range(len(df)):\n", - " if 'country' in df.iloc[l]['location_type'].lower():\n", - " if not pd.isna(df.iloc[l]['address']) or not pd.isna(df.iloc[l]['latitude']) or not pd.isna(df.iloc[l]['longitude']):\n", - " print(f'Location entry {l+1}: WARNING location type can be updated')\n", + " if \"country\" in df.iloc[l][\"location_type\"].lower():\n", + " if (\n", + " not pd.isna(df.iloc[l][\"address\"])\n", + " or not pd.isna(df.iloc[l][\"latitude\"])\n", + " or not pd.isna(df.iloc[l][\"longitude\"])\n", + " ):\n", + " print(f\"Location entry {l+1}: WARNING location type can be updated\")\n", " else:\n", - " e=0 \n", - " \n", - " if 'unknown' in df.iloc[l]['location_type'].lower():\n", - " if not pd.isna(df.iloc[l]['address']) or not pd.isna(df.iloc[l]['latitude']) or not pd.isna(df.iloc[l]['longitude']):\n", - " print(f'Location entry {l+1}: WARNING location type can be updated')\n", + " e = 0\n", + "\n", + " if \"unknown\" in df.iloc[l][\"location_type\"].lower():\n", + " if (\n", + " not pd.isna(df.iloc[l][\"address\"])\n", + " or not pd.isna(df.iloc[l][\"latitude\"])\n", + " or not pd.isna(df.iloc[l][\"longitude\"])\n", + " ):\n", + " print(f\"Location entry {l+1}: WARNING location type can be updated\")\n", " else:\n", - " e=0 \n", - " \n", - " if 'point' in df.iloc[l]['location_type'].lower():\n", - " if pd.isna(df.iloc[l]['address']):\n", - " if pd.isna(df.iloc[l]['latitude']) or pd.isna(df.iloc[l]['longitude']):\n", - " print(f'LOCATION ERROR ON ENTRY {l+1}: address or latitude/longitude REQUIRED')\n", + " e = 0\n", + "\n", + " if \"point\" in df.iloc[l][\"location_type\"].lower():\n", + " if pd.isna(df.iloc[l][\"address\"]):\n", + " if pd.isna(df.iloc[l][\"latitude\"]) or pd.isna(df.iloc[l][\"longitude\"]):\n", + " print(f\"LOCATION ERROR ON ENTRY {l+1}: address or latitude/longitude REQUIRED\")\n", " else:\n", - " e=0\n", + " e = 0\n", " else:\n", - " e=0 \n", - " if 'facility' in df.iloc[l]['location_type'].lower():\n", - " if pd.isna(df.iloc[l]['address']):\n", - " if pd.isna(df.iloc[l]['latitude']) or pd.isna(df.iloc[l]['longitude']):\n", - " print(f'LOCATION ERROR ON ENTRY {l+1}: address or latitude/longitude REQUIRED') \n", + " e = 0\n", + " if \"facility\" in df.iloc[l][\"location_type\"].lower():\n", + " if pd.isna(df.iloc[l][\"address\"]):\n", + " if pd.isna(df.iloc[l][\"latitude\"]) or pd.isna(df.iloc[l][\"longitude\"]):\n", + " print(f\"LOCATION ERROR ON ENTRY {l+1}: address or latitude/longitude REQUIRED\")\n", " else:\n", - " e=0\n", + " e = 0\n", " else:\n", - " e=0 \n", - " \n", + " e = 0\n", + "\n", " if e == 0:\n", - " print(f'Location entry {l+1}: OK') " + " print(f\"Location entry {l+1}: OK\")" ] }, { @@ -786,10 +814,10 @@ } ], "source": [ - "df_invalid.iloc[1]['address'] = 'Fake street'\n", - "df_invalid.iloc[14]['address'] = np.nan\n", - "df_invalid.iloc[20]['latitude'] = np.nan\n", - "#df_invalid.head(21)\n", + "df_invalid.iloc[1][\"address\"] = \"Fake street\"\n", + "df_invalid.iloc[14][\"address\"] = np.nan\n", + "df_invalid.iloc[20][\"latitude\"] = np.nan\n", + "# df_invalid.head(21)\n", "\n", "location_validation(df_invalid)" ] diff --git a/data/notebooks/Lab/FG_h3_indictator_calc_sql.ipynb b/data/notebooks/Lab/FG_h3_indictator_calc_sql.ipynb index b9a7dad0a..2c1d4266b 100644 --- a/data/notebooks/Lab/FG_h3_indictator_calc_sql.ipynb +++ b/data/notebooks/Lab/FG_h3_indictator_calc_sql.ipynb @@ -29,11 +29,9 @@ "outputs": [], "source": [ "# import libraries\n", - "from psycopg2.pool import ThreadedConnectionPool\n", "\n", "import pandas as pd\n", - "from tqdm import tqdm\n", - "import json" + "from psycopg2.pool import ThreadedConnectionPool" ] }, { @@ -43,7 +41,7 @@ "metadata": {}, "outputs": [], "source": [ - "#set env\n", + "# set env\n", "## env file for gcs upload\n", "env_path = \".env\"\n", "with open(env_path) as f:\n", @@ -52,20 +50,20 @@ " env_key, _val = line.split(\"=\", 1)\n", " env_value = _val.split(\"\\n\")[0]\n", " env[env_key] = env_value\n", - " \n", - "#list(env.keys())\n", + "\n", + "# list(env.keys())\n", "\n", "# set conexion to local ddbb\n", "postgres_thread_pool = ThreadedConnectionPool(\n", - " 1, \n", + " 1,\n", " 50,\n", - " host=env['API_POSTGRES_HOST'],\n", - " port=env['API_POSTGRES_PORT'],\n", - " user=env['API_POSTGRES_USERNAME'],\n", - " password=env['API_POSTGRES_PASSWORD']\n", + " host=env[\"API_POSTGRES_HOST\"],\n", + " port=env[\"API_POSTGRES_PORT\"],\n", + " user=env[\"API_POSTGRES_USERNAME\"],\n", + " password=env[\"API_POSTGRES_PASSWORD\"],\n", ")\n", "\n", - "#get list of sourcing records to iterate:\n", + "# get list of sourcing records to iterate:\n", "conn = postgres_thread_pool.getconn()\n", "cursor = conn.cursor()" ] @@ -118,7 +116,7 @@ " return cursor.fetchall()\n", " except Exception as e:\n", " conn.rollback()\n", - " print(e)\n" + " print(e)" ] }, { @@ -138,7 +136,9 @@ "LANGUAGE SQL;\n", "\"\"\"\n", "\n", - "SQL_SUM_H3_GRID_OVER_GEO_REGION = SQL_GET_H3_UNCOMPACT_GEO_REGION+\"\"\"\n", + "SQL_SUM_H3_GRID_OVER_GEO_REGION = (\n", + " SQL_GET_H3_UNCOMPACT_GEO_REGION\n", + " + \"\"\"\n", "CREATE OR REPLACE FUNCTION sum_h3_grid_over_georegion(\n", " geo_region_id uuid, \n", " h3_resolution int,\n", @@ -163,8 +163,11 @@ "$$\n", "LANGUAGE plpgsql;\n", "\"\"\"\n", + ")\n", "\n", - "SQL_SUMPROD_H3_GRIDS_OVER_GEOREGION = SQL_GET_H3_UNCOMPACT_GEO_REGION+\"\"\"\n", + "SQL_SUMPROD_H3_GRIDS_OVER_GEOREGION = (\n", + " SQL_GET_H3_UNCOMPACT_GEO_REGION\n", + " + \"\"\"\n", "CREATE OR REPLACE FUNCTION sumprod_h3_grids_over_georegion(\n", " geo_region_id uuid,\n", " h3_resolution int,\n", @@ -192,6 +195,7 @@ "$$\n", "LANGUAGE plpgsql;\n", "\"\"\"\n", + ")\n", "\n", "SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL = \"\"\"\n", "CREATE OR REPLACE FUNCTION get_h3_table_column_for_material(material_id uuid, h3_data_type material_to_h3_type_enum)\n", @@ -207,7 +211,10 @@ "LANGUAGE SQL;\n", "\"\"\"\n", "\n", - "SQL_SUM_MATERIAL_OVER_GEO_REGION = SQL_SUM_H3_GRID_OVER_GEO_REGION+SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL+\"\"\"\n", + "SQL_SUM_MATERIAL_OVER_GEO_REGION = (\n", + " SQL_SUM_H3_GRID_OVER_GEO_REGION\n", + " + SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL\n", + " + \"\"\"\n", "CREATE OR REPLACE FUNCTION sum_material_over_georegion(\n", " geo_region_id uuid, \n", " material_id uuid,\n", @@ -234,6 +241,7 @@ "$$\n", "LANGUAGE plpgsql;\n", "\"\"\"\n", + ")\n", "\n", "SQL_SUM_WEIGHTED_DEFORESTATION_OVER_GEO_REGION = \"\"\"\n", "CREATE OR REPLACE FUNCTION sum_weighted_deforestation_over_georegion(\n", @@ -996,11 +1004,11 @@ "source": [ "%%time\n", "sourcing_locations = pd.read_sql_query(\n", - " SQL_SUM_MATERIAL_OVER_GEO_REGION \\\n", - " + SQL_SUM_WEIGHTED_DEFORESTATION_OVER_GEO_REGION \\\n", - " + SQL_SUM_WEIGHTED_BIODIVERSITY_OVER_GEO_REGION \\\n", - " + SQL_SUM_WEIGHTED_CARBON_OVER_GEO_REGION \\\n", - " + SQL_SUM_WEIGHTED_WATER_OVER_GEO_REGION \\\n", + " SQL_SUM_MATERIAL_OVER_GEO_REGION\n", + " + SQL_SUM_WEIGHTED_DEFORESTATION_OVER_GEO_REGION\n", + " + SQL_SUM_WEIGHTED_BIODIVERSITY_OVER_GEO_REGION\n", + " + SQL_SUM_WEIGHTED_CARBON_OVER_GEO_REGION\n", + " + SQL_SUM_WEIGHTED_WATER_OVER_GEO_REGION\n", " + \"\"\"\n", " SELECT\n", " id,\n", @@ -1012,7 +1020,9 @@ " sum_weighted_water_over_georegion(\"geoRegionId\") as raw_water\n", " FROM\n", " sourcing_location\n", - " \"\"\", conn)\n", + " \"\"\",\n", + " conn,\n", + ")\n", "\n", "sourcing_locations" ] @@ -1402,11 +1412,11 @@ ], "source": [ "sourcing_records = pd.read_sql_query(\n", - " SQL_SUM_MATERIAL_OVER_GEO_REGION \\\n", - " + SQL_SUM_WEIGHTED_DEFORESTATION_OVER_GEO_REGION \\\n", - " + SQL_SUM_WEIGHTED_BIODIVERSITY_OVER_GEO_REGION \\\n", - " + SQL_SUM_WEIGHTED_CARBON_OVER_GEO_REGION \\\n", - " + SQL_SUM_WEIGHTED_WATER_OVER_GEO_REGION \\\n", + " SQL_SUM_MATERIAL_OVER_GEO_REGION\n", + " + SQL_SUM_WEIGHTED_DEFORESTATION_OVER_GEO_REGION\n", + " + SQL_SUM_WEIGHTED_BIODIVERSITY_OVER_GEO_REGION\n", + " + SQL_SUM_WEIGHTED_CARBON_OVER_GEO_REGION\n", + " + SQL_SUM_WEIGHTED_WATER_OVER_GEO_REGION\n", " + \"\"\"\n", " SELECT\n", " sr.id,\n", @@ -1435,26 +1445,42 @@ " sourcing_location\n", " ) as sl\n", " on sr.\"sourcingLocationId\" = sl.id\n", - "\"\"\", conn)\n", + "\"\"\",\n", + " conn,\n", + ")\n", "\n", - "sourcing_records['land_per_ton'] = sourcing_records['harvested_area'] / sourcing_records['production']\n", + "sourcing_records[\"land_per_ton\"] = (\n", + " sourcing_records[\"harvested_area\"] / sourcing_records[\"production\"]\n", + ")\n", "\n", - "sourcing_records['deforestation_per_ha_landuse'] = sourcing_records['raw_deforestation'] / sourcing_records['harvested_area']\n", - "sourcing_records['bio_per_ha_landuse'] = sourcing_records['raw_biodiversity'] / sourcing_records['harvested_area']\n", - "sourcing_records['carbon_per_ha_landuse'] = sourcing_records['raw_carbon'] / sourcing_records['harvested_area']\n", - "sourcing_records['land_use'] = sourcing_records['land_per_ton'] * sourcing_records['tonnage']\n", + "sourcing_records[\"deforestation_per_ha_landuse\"] = (\n", + " sourcing_records[\"raw_deforestation\"] / sourcing_records[\"harvested_area\"]\n", + ")\n", + "sourcing_records[\"bio_per_ha_landuse\"] = (\n", + " sourcing_records[\"raw_biodiversity\"] / sourcing_records[\"harvested_area\"]\n", + ")\n", + "sourcing_records[\"carbon_per_ha_landuse\"] = (\n", + " sourcing_records[\"raw_carbon\"] / sourcing_records[\"harvested_area\"]\n", + ")\n", + "sourcing_records[\"land_use\"] = sourcing_records[\"land_per_ton\"] * sourcing_records[\"tonnage\"]\n", "\n", - "sourcing_records['deforestation'] = sourcing_records['deforestation_per_ha_landuse'] * sourcing_records['land_use']\n", - "sourcing_records['biodiversity_loss'] = sourcing_records['bio_per_ha_landuse'] * sourcing_records['land_use']\n", - "sourcing_records['carbon_loss'] = sourcing_records['carbon_per_ha_landuse'] * sourcing_records['land_use']\n", - "sourcing_records['water_impact'] = sourcing_records['raw_water'] * sourcing_records['tonnage']\n", + "sourcing_records[\"deforestation\"] = (\n", + " sourcing_records[\"deforestation_per_ha_landuse\"] * sourcing_records[\"land_use\"]\n", + ")\n", + "sourcing_records[\"biodiversity_loss\"] = (\n", + " sourcing_records[\"bio_per_ha_landuse\"] * sourcing_records[\"land_use\"]\n", + ")\n", + "sourcing_records[\"carbon_loss\"] = (\n", + " sourcing_records[\"carbon_per_ha_landuse\"] * sourcing_records[\"land_use\"]\n", + ")\n", + "sourcing_records[\"water_impact\"] = sourcing_records[\"raw_water\"] * sourcing_records[\"tonnage\"]\n", "\n", "# Farm impact scaler = production\n", "# Land use change impact scaler = harvested_area\n", "\n", - "sourcing_records.to_csv('test_impact_calc.csv')\n", + "sourcing_records.to_csv(\"test_impact_calc.csv\")\n", "\n", - "sourcing_records\n" + "sourcing_records" ] }, { @@ -42106,9 +42132,10 @@ } ], "source": [ - "query1 = SQL_SUM_MATERIAL_OVER_GEO_REGION \\\n", - " + SQL_SUM_WEIGHTED_DEFORESTATION_OVER_GEO_REGION \\\n", - " + SQL_SUM_WEIGHTED_BIODIVERSITY_OVER_GEO_REGION \\\n", + "query1 = (\n", + " SQL_SUM_MATERIAL_OVER_GEO_REGION\n", + " + SQL_SUM_WEIGHTED_DEFORESTATION_OVER_GEO_REGION\n", + " + SQL_SUM_WEIGHTED_BIODIVERSITY_OVER_GEO_REGION\n", " + \"\"\"\n", "EXPLAIN ANALYZE \n", "SELECT \n", @@ -42116,6 +42143,7 @@ " sum_weighted_deforestation_over_georegion('68ed9c70-0f01-495f-9a53-68e5cb35c7ca', '0d7b1be5-dc86-47b8-ba3a-25190a275011', 'harvest'),\n", " sum_weighted_bio_over_georegion('68ed9c70-0f01-495f-9a53-68e5cb35c7ca', '0d7b1be5-dc86-47b8-ba3a-25190a275011', 'harvest')\n", "\"\"\"\n", + ")\n", "print(psql(query1))\n", "\n", "\"\"\"\n", @@ -42128,10 +42156,10 @@ " INNER JOIN h3_grid_earthstat2000_global_prod prod\n", " on geom.h3index = prod.h3index\n", "\"\"\"\n", - "#print(psql(query2))\n", + "# print(psql(query2))\n", "\n", "%timeit psql(\"SELECT sum_material_over_georegion('68ed9c70-0f01-495f-9a53-68e5cb35c7ca', '0d7b1be5-dc86-47b8-ba3a-25190a275011', 'producer')\")\n", - "%timeit psql(\"\"\"SELECT sum(prod.\"earthstat2000GlobalRubberProduction\") as value FROM (SELECT h3_uncompact(geo_region.\"h3Compact\"::h3index[], 6) h3index FROM geo_region WHERE geo_region.id = '68ed9c70-0f01-495f-9a53-68e5cb35c7ca') geom INNER JOIN h3_grid_earthstat2000_global_prod prod on geom.h3index = prod.h3index\"\"\")\n" + "%timeit psql(\"\"\"SELECT sum(prod.\"earthstat2000GlobalRubberProduction\") as value FROM (SELECT h3_uncompact(geo_region.\"h3Compact\"::h3index[], 6) h3index FROM geo_region WHERE geo_region.id = '68ed9c70-0f01-495f-9a53-68e5cb35c7ca') geom INNER JOIN h3_grid_earthstat2000_global_prod prod on geom.h3index = prod.h3index\"\"\")" ] }, { diff --git a/data/notebooks/Lab/QA_ ingested_values.ipynb b/data/notebooks/Lab/QA_ ingested_values.ipynb index a506a488c..ce3ece098 100644 --- a/data/notebooks/Lab/QA_ ingested_values.ipynb +++ b/data/notebooks/Lab/QA_ ingested_values.ipynb @@ -44,23 +44,23 @@ "source": [ "## import libraries\n", "# import libraries\n", - "from psycopg2.pool import ThreadedConnectionPool\n", - "\n", - "import pandas as pd\n", - "import geopandas as gpd\n", - "#from tqdm import tqdm\n", - "import urllib.request \n", "import os\n", + "\n", + "# from tqdm import tqdm\n", + "import urllib.request\n", "from zipfile import ZipFile\n", - "#import json\n", "\n", - "#import h3\n", - "#import h3pandas\n", - "from h3ronpy import raster\n", + "import geopandas as gpd\n", + "import pandas as pd\n", "import rasterio as rio\n", + "\n", + "# import h3\n", + "# import h3pandas\n", + "from h3ronpy import raster\n", + "from psycopg2.pool import ThreadedConnectionPool\n", "from rasterstats import zonal_stats\n", "\n", - "import requests\n" + "# import json" ] }, { @@ -70,7 +70,7 @@ "metadata": {}, "outputs": [], "source": [ - "#set env\n", + "# set env\n", "## env file for gcs upload\n", "env_path = \".env\"\n", "with open(env_path) as f:\n", @@ -79,20 +79,20 @@ " env_key, _val = line.split(\"=\", 1)\n", " env_value = _val.split(\"\\n\")[0]\n", " env[env_key] = env_value\n", - " \n", - "#list(env.keys())\n", + "\n", + "# list(env.keys())\n", "\n", "# set conexion to local ddbb\n", "postgres_thread_pool = ThreadedConnectionPool(\n", - " 1, \n", + " 1,\n", " 50,\n", - " host=env['API_POSTGRES_HOST'],\n", - " port=env['API_POSTGRES_PORT'],\n", - " user=env['API_POSTGRES_USERNAME'],\n", - " password=env['API_POSTGRES_PASSWORD']\n", + " host=env[\"API_POSTGRES_HOST\"],\n", + " port=env[\"API_POSTGRES_PORT\"],\n", + " user=env[\"API_POSTGRES_USERNAME\"],\n", + " password=env[\"API_POSTGRES_PASSWORD\"],\n", ")\n", "\n", - "#get list of sourcing records to iterate:\n", + "# get list of sourcing records to iterate:\n", "conn = postgres_thread_pool.getconn()\n", "cursor = conn.cursor()" ] @@ -116,7 +116,9 @@ "LANGUAGE SQL;\n", "\"\"\"\n", "\n", - "SQL_SUM_H3_GRID_OVER_GEO_REGION = SQL_GET_H3_UNCOMPACT_GEO_REGION+\"\"\"\n", + "SQL_SUM_H3_GRID_OVER_GEO_REGION = (\n", + " SQL_GET_H3_UNCOMPACT_GEO_REGION\n", + " + \"\"\"\n", "CREATE OR REPLACE FUNCTION sum_h3_grid_over_georegion(\n", " geo_region_id uuid, \n", " h3_resolution int,\n", @@ -141,8 +143,11 @@ "$$\n", "LANGUAGE plpgsql;\n", "\"\"\"\n", + ")\n", "\n", - "SQL_SUMPROD_H3_GRIDS_OVER_GEOREGION = SQL_GET_H3_UNCOMPACT_GEO_REGION+\"\"\"\n", + "SQL_SUMPROD_H3_GRIDS_OVER_GEOREGION = (\n", + " SQL_GET_H3_UNCOMPACT_GEO_REGION\n", + " + \"\"\"\n", "CREATE OR REPLACE FUNCTION sumprod_h3_grids_over_georegion(\n", " geo_region_id uuid,\n", " h3_resolution int,\n", @@ -170,6 +175,7 @@ "$$\n", "LANGUAGE plpgsql;\n", "\"\"\"\n", + ")\n", "\n", "SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL = \"\"\"\n", "CREATE OR REPLACE FUNCTION get_h3_table_column_for_material(material_id uuid, h3_data_type material_to_h3_type_enum)\n", @@ -185,7 +191,10 @@ "LANGUAGE SQL;\n", "\"\"\"\n", "\n", - "SQL_SUM_MATERIAL_OVER_GEO_REGION = SQL_SUM_H3_GRID_OVER_GEO_REGION+SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL+\"\"\"\n", + "SQL_SUM_MATERIAL_OVER_GEO_REGION = (\n", + " SQL_SUM_H3_GRID_OVER_GEO_REGION\n", + " + SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL\n", + " + \"\"\"\n", "CREATE OR REPLACE FUNCTION sum_material_over_georegion(\n", " geo_region_id uuid, \n", " material_id uuid,\n", @@ -212,6 +221,7 @@ "$$\n", "LANGUAGE plpgsql;\n", "\"\"\"\n", + ")\n", "\n", "SQL_SUM_WEIGHTED_DEFORESTATION_OVER_GEO_REGION = \"\"\"\n", "CREATE OR REPLACE FUNCTION sum_weighted_deforestation_over_georegion(\n", @@ -379,6 +389,7 @@ "source": [ "# define functions raster to h3\n", "\n", + "\n", "def donwloadZipFile(data_url, output_folder, filename):\n", " \"\"\"\n", " Input\n", @@ -386,22 +397,21 @@ " data_url [string] : data url to download\n", " output_filder [string]: output folder to save the downloaded data\n", " filename: name of the saved file\"\"\"\n", - " \n", + "\n", " if not os.path.exists(output_folder):\n", " os.makedirs(output_folder)\n", - " print('Output folder created!')\n", + " print(\"Output folder created!\")\n", " else:\n", " pass\n", - " print('Output folder already exists!')\n", - " \n", - " urllib.request.urlretrieve(data_url, output_folder+f\"/{filename}\")\n", - " \n", - " with ZipFile(output_folder+f\"/{filename}\", 'r') as zipObj:\n", - " # Extract all the contents of zip file in different directory\n", - " zipObj.extractall(output_folder)\n", - " print('Data extracted!')\n", - " print('Done!')\n", - " \n" + " print(\"Output folder already exists!\")\n", + "\n", + " urllib.request.urlretrieve(data_url, output_folder + f\"/{filename}\")\n", + "\n", + " with ZipFile(output_folder + f\"/{filename}\", \"r\") as zipObj:\n", + " # Extract all the contents of zip file in different directory\n", + " zipObj.extractall(output_folder)\n", + " print(\"Data extracted!\")\n", + " print(\"Done!\")" ] }, { @@ -824,11 +834,11 @@ ], "source": [ "sourcing_records = pd.read_sql_query(\n", - " SQL_SUM_MATERIAL_OVER_GEO_REGION \\\n", - " + SQL_SUM_WEIGHTED_DEFORESTATION_OVER_GEO_REGION \\\n", - " + SQL_SUM_WEIGHTED_BIODIVERSITY_OVER_GEO_REGION \\\n", - " + SQL_SUM_WEIGHTED_CARBON_OVER_GEO_REGION \\\n", - " + SQL_SUM_WEIGHTED_WATER_OVER_GEO_REGION \\\n", + " SQL_SUM_MATERIAL_OVER_GEO_REGION\n", + " + SQL_SUM_WEIGHTED_DEFORESTATION_OVER_GEO_REGION\n", + " + SQL_SUM_WEIGHTED_BIODIVERSITY_OVER_GEO_REGION\n", + " + SQL_SUM_WEIGHTED_CARBON_OVER_GEO_REGION\n", + " + SQL_SUM_WEIGHTED_WATER_OVER_GEO_REGION\n", " + \"\"\"\n", " SELECT\n", " sr.id,\n", @@ -860,26 +870,42 @@ " ) as sl\n", " on sr.\"sourcingLocationId\" = sl.id\n", " WHERE sl.\"materialId\"='0d7b1be5-dc86-47b8-ba3a-25190a275011'\n", - "\"\"\", conn)\n", + "\"\"\",\n", + " conn,\n", + ")\n", "\n", - "sourcing_records['land_per_ton'] = sourcing_records['harvested_area'] / sourcing_records['production']\n", + "sourcing_records[\"land_per_ton\"] = (\n", + " sourcing_records[\"harvested_area\"] / sourcing_records[\"production\"]\n", + ")\n", "\n", - "sourcing_records['deforestation_per_ha_landuse'] = sourcing_records['raw_deforestation'] / sourcing_records['harvested_area']\n", - "sourcing_records['bio_per_ha_landuse'] = sourcing_records['raw_biodiversity'] / sourcing_records['harvested_area']\n", - "sourcing_records['carbon_per_ha_landuse'] = sourcing_records['raw_carbon'] / sourcing_records['harvested_area']\n", - "sourcing_records['land_use'] = sourcing_records['land_per_ton'] * sourcing_records['tonnage']\n", + "sourcing_records[\"deforestation_per_ha_landuse\"] = (\n", + " sourcing_records[\"raw_deforestation\"] / sourcing_records[\"harvested_area\"]\n", + ")\n", + "sourcing_records[\"bio_per_ha_landuse\"] = (\n", + " sourcing_records[\"raw_biodiversity\"] / sourcing_records[\"harvested_area\"]\n", + ")\n", + "sourcing_records[\"carbon_per_ha_landuse\"] = (\n", + " sourcing_records[\"raw_carbon\"] / sourcing_records[\"harvested_area\"]\n", + ")\n", + "sourcing_records[\"land_use\"] = sourcing_records[\"land_per_ton\"] * sourcing_records[\"tonnage\"]\n", "\n", - "sourcing_records['deforestation'] = sourcing_records['deforestation_per_ha_landuse'] * sourcing_records['land_use']\n", - "sourcing_records['biodiversity_loss'] = sourcing_records['bio_per_ha_landuse'] * sourcing_records['land_use']\n", - "sourcing_records['carbon_loss'] = sourcing_records['carbon_per_ha_landuse'] * sourcing_records['land_use']\n", - "sourcing_records['water_impact'] = sourcing_records['raw_water'] * sourcing_records['tonnage']\n", + "sourcing_records[\"deforestation\"] = (\n", + " sourcing_records[\"deforestation_per_ha_landuse\"] * sourcing_records[\"land_use\"]\n", + ")\n", + "sourcing_records[\"biodiversity_loss\"] = (\n", + " sourcing_records[\"bio_per_ha_landuse\"] * sourcing_records[\"land_use\"]\n", + ")\n", + "sourcing_records[\"carbon_loss\"] = (\n", + " sourcing_records[\"carbon_per_ha_landuse\"] * sourcing_records[\"land_use\"]\n", + ")\n", + "sourcing_records[\"water_impact\"] = sourcing_records[\"raw_water\"] * sourcing_records[\"tonnage\"]\n", "\n", "# Farm impact scaler = production\n", "# Land use change impact scaler = harvested_area\n", "\n", - "#sourcing_records.to_csv('test_impact_calc.csv')\n", + "# sourcing_records.to_csv('test_impact_calc.csv')\n", "\n", - "sourcing_records\n" + "sourcing_records" ] }, { @@ -890,7 +916,9 @@ "outputs": [], "source": [ "# export to local csv\n", - "sourcing_records.to_csv('../../datasets/raw/qa_values/rubber_qa/all_indicators_ddbb_calculations.csv')" + "sourcing_records.to_csv(\n", + " \"../../datasets/raw/qa_values/rubber_qa/all_indicators_ddbb_calculations.csv\"\n", + ")" ] }, { @@ -987,43 +1015,54 @@ "source": [ "# deforestation\n", "\n", - "#obtains impact calculation for deforestation from the ddbb\n", - "ir_deforestation = pd.read_sql_query(\"\"\"SELECT sr.id, sr.tonnage, ir.value def FROM sourcing_records sr \n", + "# obtains impact calculation for deforestation from the ddbb\n", + "ir_deforestation = pd.read_sql_query(\n", + " \"\"\"SELECT sr.id, sr.tonnage, ir.value def FROM sourcing_records sr \n", " INNER JOIN sourcing_location sl on sl.id=sr.\"sourcingLocationId\" \n", " INNER JOIN material m on m.id=sl.\"materialId\" \n", " INNER JOIN geo_region gr on gr.id =sl.\"geoRegionId\" \n", " INNER JOIN indicator_record ir on ir.\"sourcingRecordId\"= sr.id\n", - " WHERE m.\"name\" = 'Rubber and articles thereof' and ir.\"indicatorId\"='633cf928-7c4f-41a3-99c5-e8c1bda0b323'\"\"\", conn)\n", + " WHERE m.\"name\" = 'Rubber and articles thereof' and ir.\"indicatorId\"='633cf928-7c4f-41a3-99c5-e8c1bda0b323'\"\"\",\n", + " conn,\n", + ")\n", "\n", "# biodiversity\n", "\n", - "ir_biodiversity = pd.read_sql_query(\"\"\"SELECT sr.id, sr.tonnage, ir.value bio FROM sourcing_records sr \n", + "ir_biodiversity = pd.read_sql_query(\n", + " \"\"\"SELECT sr.id, sr.tonnage, ir.value bio FROM sourcing_records sr \n", " INNER JOIN sourcing_location sl on sl.id=sr.\"sourcingLocationId\" \n", " INNER JOIN material m on m.id=sl.\"materialId\" \n", " INNER JOIN geo_region gr on gr.id =sl.\"geoRegionId\" \n", " INNER JOIN indicator_record ir on ir.\"sourcingRecordId\"= sr.id\n", - " WHERE m.\"name\" = 'Rubber and articles thereof' and ir.\"indicatorId\"='0594aba7-70a5-460c-9b58-fc1802d264ea'\"\"\", conn)\n", + " WHERE m.\"name\" = 'Rubber and articles thereof' and ir.\"indicatorId\"='0594aba7-70a5-460c-9b58-fc1802d264ea'\"\"\",\n", + " conn,\n", + ")\n", "\n", "\n", "# carbon\n", "\n", - "ir_carbon = pd.read_sql_query(\"\"\"SELECT sr.id, sr.tonnage, ir.value carbon FROM sourcing_records sr \n", + "ir_carbon = pd.read_sql_query(\n", + " \"\"\"SELECT sr.id, sr.tonnage, ir.value carbon FROM sourcing_records sr \n", " INNER JOIN sourcing_location sl on sl.id=sr.\"sourcingLocationId\" \n", " INNER JOIN material m on m.id=sl.\"materialId\" \n", " INNER JOIN geo_region gr on gr.id =sl.\"geoRegionId\" \n", " INNER JOIN indicator_record ir on ir.\"sourcingRecordId\"= sr.id\n", - " WHERE m.\"name\" = 'Rubber and articles thereof' and ir.\"indicatorId\"='c71eb531-2c8e-40d2-ae49-1049543be4d1'\"\"\", conn)\n", - "\n", + " WHERE m.\"name\" = 'Rubber and articles thereof' and ir.\"indicatorId\"='c71eb531-2c8e-40d2-ae49-1049543be4d1'\"\"\",\n", + " conn,\n", + ")\n", "\n", "\n", "# water\n", "\n", - "ir_water = pd.read_sql_query(\"\"\"SELECT sr.id, sr.tonnage, ir.value water FROM sourcing_records sr \n", + "ir_water = pd.read_sql_query(\n", + " \"\"\"SELECT sr.id, sr.tonnage, ir.value water FROM sourcing_records sr \n", " INNER JOIN sourcing_location sl on sl.id=sr.\"sourcingLocationId\" \n", " INNER JOIN material m on m.id=sl.\"materialId\" \n", " INNER JOIN geo_region gr on gr.id =sl.\"geoRegionId\" \n", " INNER JOIN indicator_record ir on ir.\"sourcingRecordId\"= sr.id\n", - " WHERE m.\"name\" = 'Rubber and articles thereof' and ir.\"indicatorId\"='e2c00251-fe31-4330-8c38-604535d795dc'\"\"\", conn)\n", + " WHERE m.\"name\" = 'Rubber and articles thereof' and ir.\"indicatorId\"='e2c00251-fe31-4330-8c38-604535d795dc'\"\"\",\n", + " conn,\n", + ")\n", "\n", "\n", "ir_water.head()" @@ -1155,17 +1194,11 @@ ], "source": [ "##merge all dataframes\n", - "merged_all = ir_water.merge(ir_carbon,\n", - " how='inner',\n", - " on='id').merge(\n", - " ir_biodiversity,\n", - " how='inner',\n", - " on='id'\n", - " ).merge(\n", - " ir_deforestation,\n", - " how='inner',\n", - " on='id'\n", - " )\n", + "merged_all = (\n", + " ir_water.merge(ir_carbon, how=\"inner\", on=\"id\")\n", + " .merge(ir_biodiversity, how=\"inner\", on=\"id\")\n", + " .merge(ir_deforestation, how=\"inner\", on=\"id\")\n", + ")\n", "merged_all.head()" ] }, @@ -1176,8 +1209,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export to csv\n", - "merged_all.to_csv('../../datasets/raw/qa_values/rubber_qa/all_indicators_ddbb_indicator_record.csv')" + "# export to csv\n", + "merged_all.to_csv(\"../../datasets/raw/qa_values/rubber_qa/all_indicators_ddbb_indicator_record.csv\")" ] }, { @@ -1276,12 +1309,16 @@ "source": [ "# obtain rubber dataset that will be used to obtain the zonal statistics\n", "\n", - "df_rubber = gpd.GeoDataFrame.from_postgis(\"\"\"SELECT sr.id, gr.\"theGeom\" FROM sourcing_records sr \n", + "df_rubber = gpd.GeoDataFrame.from_postgis(\n", + " \"\"\"SELECT sr.id, gr.\"theGeom\" FROM sourcing_records sr \n", " INNER JOIN sourcing_location sl on sl.id=sr.\"sourcingLocationId\" \n", " INNER JOIN material m on m.id=sl.\"materialId\" \n", " INNER JOIN geo_region gr on gr.id =sl.\"geoRegionId\" \n", " INNER JOIN indicator_record ir on ir.\"sourcingRecordId\"= sr.id\n", - " WHERE m.\"name\" = 'Rubber and articles thereof' and ir.\"indicatorId\"='633cf928-7c4f-41a3-99c5-e8c1bda0b323'\"\"\", conn, geom_col='theGeom')\n", + " WHERE m.\"name\" = 'Rubber and articles thereof' and ir.\"indicatorId\"='633cf928-7c4f-41a3-99c5-e8c1bda0b323'\"\"\",\n", + " conn,\n", + " geom_col=\"theGeom\",\n", + ")\n", "df_rubber.head()" ] }, @@ -1302,9 +1339,9 @@ "outputs": [], "source": [ "path = \"../../datasets/raw\"\n", - "output_folder = path + '/qa_values'\n", - "#output_file = output_folder+\"/rubber_HarvAreaYield_Geotiff.zip\n", - "data_url = \"https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/rubber_HarvAreaYield_Geotiff.zip\"\n" + "output_folder = path + \"/qa_values\"\n", + "# output_file = output_folder+\"/rubber_HarvAreaYield_Geotiff.zip\n", + "data_url = \"https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/rubber_HarvAreaYield_Geotiff.zip\"" ] }, { @@ -1324,7 +1361,7 @@ } ], "source": [ - "donwloadZipFile(data_url, output_folder, filename='rubber_HarvAreaYield_Geotiff.zip')" + "donwloadZipFile(data_url, output_folder, filename=\"rubber_HarvAreaYield_Geotiff.zip\")" ] }, { @@ -1350,7 +1387,11 @@ } ], "source": [ - "[file for file in os.listdir(output_folder+ '/rubber_HarvAreaYield_Geotiff') if file.endswith('.tif')]" + "[\n", + " file\n", + " for file in os.listdir(output_folder + \"/rubber_HarvAreaYield_Geotiff\")\n", + " if file.endswith(\".tif\")\n", + "]" ] }, { @@ -1371,9 +1412,11 @@ ], "source": [ "# download geometry so we canclip the raster data to a particular location\n", - "donwloadZipFile(data_url='https://data.biogeo.ucdavis.edu/data/gadm3.6/shp/gadm36_IDN_shp.zip',\n", - " output_folder=path + '/qa_values',\n", - " filename='gadm36_IDN_shp.zip')\n" + "donwloadZipFile(\n", + " data_url=\"https://data.biogeo.ucdavis.edu/data/gadm3.6/shp/gadm36_IDN_shp.zip\",\n", + " output_folder=path + \"/qa_values\",\n", + " filename=\"gadm36_IDN_shp.zip\",\n", + ")" ] }, { @@ -1392,7 +1435,7 @@ } ], "source": [ - "#clip rubber dato to indonesia so we can work with a cliped raster\n", + "# clip rubber dato to indonesia so we can work with a cliped raster\n", "!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -of GTiff -cutline ../../datasets/raw/qa_values/gadm36_IDN_1.shp -cl gadm36_IDN_1 -crop_to_cutline ../../datasets/raw/qa_values/rubber_HarvAreaYield_Geotiff/rubber_Production.tif ../../datasets/raw/qa_values/rubber_production_clip.tif" ] }, @@ -1418,12 +1461,18 @@ "source": [ "# We do the check using rubber production as its one of the datases that we use\n", "resolution = 6\n", - "raster_path = output_folder+ '/rubber_production_clip.tif'\n", + "raster_path = output_folder + \"/rubber_production_clip.tif\"\n", "with rio.open(raster_path) as src:\n", - " gdf = raster.raster_to_geodataframe(src.read(1), src.transform, h3_resolution=resolution, nodata_value=src.profile['nodata'], compacted=False)\n", + " gdf = raster.raster_to_geodataframe(\n", + " src.read(1),\n", + " src.transform,\n", + " h3_resolution=resolution,\n", + " nodata_value=src.profile[\"nodata\"],\n", + " compacted=False,\n", + " )\n", "\n", - " gdf.plot('value')\n", - " gdf['h3index'] = gdf['h3index'].apply(hex)" + " gdf.plot(\"value\")\n", + " gdf[\"h3index\"] = gdf[\"h3index\"].apply(hex)" ] }, { @@ -1659,9 +1708,9 @@ } ], "source": [ - "#remove the first part to be able of merging with the additional dataset from the db\n", - "updated_index = [index.split('x')[1] for index in list(gdf['h3index'])]\n", - "gdf['h3index']=updated_index\n", + "# remove the first part to be able of merging with the additional dataset from the db\n", + "updated_index = [index.split(\"x\")[1] for index in list(gdf[\"h3index\"])]\n", + "gdf[\"h3index\"] = updated_index\n", "gdf.head()" ] }, @@ -1672,8 +1721,8 @@ "metadata": {}, "outputs": [], "source": [ - "#save to file so we can explore in qgis\n", - "gdf.to_file(output_folder+ '/h3_rubber_production_clip.shp')" + "# save to file so we can explore in qgis\n", + "gdf.to_file(output_folder + \"/h3_rubber_production_clip.shp\")" ] }, { @@ -1753,7 +1802,10 @@ ], "source": [ "# retrieve what we have in the ddbb to compare results\n", - "ddbb_database = pd.read_sql_query(\"\"\"select prod.h3index, prod.\"earthstat2000GlobalRubberProduction\" from h3_grid_earthstat2000_global_prod prod\"\"\" , conn)\n", + "ddbb_database = pd.read_sql_query(\n", + " \"\"\"select prod.h3index, prod.\"earthstat2000GlobalRubberProduction\" from h3_grid_earthstat2000_global_prod prod\"\"\",\n", + " conn,\n", + ")\n", "ddbb_database.head()" ] }, @@ -1859,7 +1911,7 @@ } ], "source": [ - "merged_gdf = gdf.merge(ddbb_database, how='inner', on='h3index')\n", + "merged_gdf = gdf.merge(ddbb_database, how=\"inner\", on=\"h3index\")\n", "merged_gdf.head()" ] }, @@ -2039,8 +2091,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export to csv so we can compare in the excel spreadhseet\n", - "merged_gdf.to_csv(output_folder+ '/h3_rubber_production_clip.csv')" + "# export to csv so we can compare in the excel spreadhseet\n", + "merged_gdf.to_csv(output_folder + \"/h3_rubber_production_clip.csv\")" ] }, { @@ -2191,19 +2243,15 @@ } ], "source": [ - "raster_path = '../../datasets/raw/qa_values/rubber_HarvAreaYield_Geotiff/rubber_Production.tif'\n", + "raster_path = \"../../datasets/raw/qa_values/rubber_HarvAreaYield_Geotiff/rubber_Production.tif\"\n", "raster_stats = []\n", "for i, row in df_rubber.iterrows():\n", - " geom = row['theGeom']\n", - " stat_ = zonal_stats(geom,\n", - " raster_path,\n", - " stats='sum',\n", - " all_touched = True\n", - " )\n", - " raster_stats.append(stat_[0]['sum'])\n", - " \n", - "df_rubber['raster_stats_prod']=raster_stats\n", - "df_rubber.head()\n" + " geom = row[\"theGeom\"]\n", + " stat_ = zonal_stats(geom, raster_path, stats=\"sum\", all_touched=True)\n", + " raster_stats.append(stat_[0][\"sum\"])\n", + "\n", + "df_rubber[\"raster_stats_prod\"] = raster_stats\n", + "df_rubber.head()" ] }, { @@ -2213,7 +2261,7 @@ "metadata": {}, "outputs": [], "source": [ - "df_rubber.to_csv('../../datasets/raw/qa_values/rubber_qa/production_db_raster_v2.csv')" + "df_rubber.to_csv(\"../../datasets/raw/qa_values/rubber_qa/production_db_raster_v2.csv\")" ] }, { @@ -2365,19 +2413,17 @@ } ], "source": [ - "raster_path = '../../datasets/raw/qa_values/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaHectares.tif'\n", + "raster_path = (\n", + " \"../../datasets/raw/qa_values/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaHectares.tif\"\n", + ")\n", "raster_stats = []\n", "for i, row in df_rubber.iterrows():\n", - " geom = row['theGeom']\n", - " stat_ = zonal_stats(geom,\n", - " raster_path,\n", - " stats='sum',\n", - " all_touched = True\n", - " )\n", - " raster_stats.append(stat_[0]['sum'])\n", - " \n", - "sourcing_records_harvest['raster_stats_prod']=raster_stats\n", - "sourcing_records_harvest.head()\n" + " geom = row[\"theGeom\"]\n", + " stat_ = zonal_stats(geom, raster_path, stats=\"sum\", all_touched=True)\n", + " raster_stats.append(stat_[0][\"sum\"])\n", + "\n", + "sourcing_records_harvest[\"raster_stats_prod\"] = raster_stats\n", + "sourcing_records_harvest.head()" ] }, { @@ -2388,7 +2434,9 @@ "outputs": [], "source": [ "# export to csv so we can compare in excel spreadhseet\n", - "sourcing_records_harvest.to_csv('../../datasets/raw/qa_values/rubber_qa/harvest_db_raster_distinct_v1.csv')" + "sourcing_records_harvest.to_csv(\n", + " \"../../datasets/raw/qa_values/rubber_qa/harvest_db_raster_distinct_v1.csv\"\n", + ")" ] }, { @@ -2549,20 +2597,20 @@ } ], "source": [ - "#REPLICATE raster preprocesing for deforestation ingestion\n", + "# REPLICATE raster preprocesing for deforestation ingestion\n", "\n", - "tiles_files = '../../datasets/raw/downloaded_tiles'\n", - "tiles_outputs_count = '../../datasets/processed/processed_files/count'\n", - "tiles_outputs_density = '../../datasets/processed/processed_files/density'\n", + "tiles_files = \"../../datasets/raw/downloaded_tiles\"\n", + "tiles_outputs_count = \"../../datasets/processed/processed_files/count\"\n", + "tiles_outputs_density = \"../../datasets/processed/processed_files/density\"\n", "\n", - "#upsample rasters before merging\n", + "# upsample rasters before merging\n", "for file in os.listdir(tiles_files):\n", - " raster_path = tiles_files+'/'+file\n", - " outputh_file_count = tiles_outputs_count+'/'+file\n", - " outputh_file_density = tiles_outputs_density+'/'+file\n", - " \n", - " #!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -r max -tr 0.0833333333333286 0.0833333333333286 -multi -of GTiff -wo NUM_THREADS=ALL_CPUS $raster_path $outputh_file; \n", - " !gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -r sum -tr 0.0833333333333286 0.0833333333333286 -multi -of GTiff -wo NUM_THREADS=ALL_CPUS $raster_path $outputh_file_count; \n", + " raster_path = tiles_files + \"/\" + file\n", + " outputh_file_count = tiles_outputs_count + \"/\" + file\n", + " outputh_file_density = tiles_outputs_density + \"/\" + file\n", + "\n", + " #!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -r max -tr 0.0833333333333286 0.0833333333333286 -multi -of GTiff -wo NUM_THREADS=ALL_CPUS $raster_path $outputh_file;\n", + " !gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -r sum -tr 0.0833333333333286 0.0833333333333286 -multi -of GTiff -wo NUM_THREADS=ALL_CPUS $raster_path $outputh_file_count;\n", " !gdal_calc.py --calc \"A/111111.1111111111\" --format GTiff --type Float32 --NoDataValue 0.0 -A $outputh_file_count --A_band 1 --outfile $tiles_outputs_density" ] }, @@ -2581,7 +2629,7 @@ } ], "source": [ - "#generate virtual tile\n", + "# generate virtual tile\n", "!gdalbuildvrt ../../datasets/processed/processed_files/hansen_loss_2020_ha_count.vrt ../../datasets/processed/processed_files/count/*.tif" ] }, @@ -2601,7 +2649,7 @@ } ], "source": [ - "#translate\n", + "# translate\n", "!gdal_translate -of GTiff -co NUM_THREADS=ALL_CPUS -co BIGTIFF=YES -co COMPRESS=DEFLATE -co PREDICTOR=2 -co ZLEVEL=9 -co BLOCKXSIZE=512 -co BLOCKYSIZE=512 ../../datasets/processed/processed_files/hansen_loss_2020_ha.vrt ../../datasets/processed/processed_files/hansen_loss_2020_ha.tif" ] }, @@ -2643,10 +2691,12 @@ } ], "source": [ - "#calculate the raw deforestation using this raster\n", - "#harvest area raster\n", - "harvest_area_rubber = '../../datasets/raw/qa_values/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaHectares.tif'\n", - "deforestation = '../../datasets/processed/processed_files/hansen_loss_2019_2020_ha.tif'\n", + "# calculate the raw deforestation using this raster\n", + "# harvest area raster\n", + "harvest_area_rubber = (\n", + " \"../../datasets/raw/qa_values/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaHectares.tif\"\n", + ")\n", + "deforestation = \"../../datasets/processed/processed_files/hansen_loss_2019_2020_ha.tif\"\n", "!gdal_translate -projwin -180.0 80.0 150.0 -20.0 -of GTiff $harvest_area_rubber ../../datasets/raw/qa_values/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaHectares_clip_extend.tif\n", "!gdal_calc.py --calc \"(A*B)\" --format GTiff --type Byte -A ../../datasets/raw/qa_values/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaHectares_clip_extend.tif --A_band 1 -B $deforestation --outfile ../../datasets/processed/processed_files/hansen_loss_harvest_area_rubber.tif" ] @@ -2783,20 +2833,18 @@ } ], "source": [ - "#zonal statistics for those areas\n", - "raster_raw_deforestation = '../../datasets/processed/processed_files/hansen_loss_harvest_area_rubber.tif'\n", + "# zonal statistics for those areas\n", + "raster_raw_deforestation = (\n", + " \"../../datasets/processed/processed_files/hansen_loss_harvest_area_rubber.tif\"\n", + ")\n", "\n", "raster_stats = []\n", "for i, row in df_rubber.iterrows():\n", - " geom = row['theGeom']\n", - " stat_ = zonal_stats(geom,\n", - " raster_raw_deforestation,\n", - " stats='sum',\n", - " all_touched = True\n", - " )\n", - " raster_stats.append(stat_[0]['sum'])\n", - " \n", - "df_rubber['rs_raw_def']=raster_stats\n", + " geom = row[\"theGeom\"]\n", + " stat_ = zonal_stats(geom, raster_raw_deforestation, stats=\"sum\", all_touched=True)\n", + " raster_stats.append(stat_[0][\"sum\"])\n", + "\n", + "df_rubber[\"rs_raw_def\"] = raster_stats\n", "df_rubber.head()" ] }, @@ -2807,8 +2855,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export to csv\n", - "df_rubber.to_csv('../../datasets/raw/qa_values/rubber_qa/deforestation_raster_calc_raw_def_v2.csv')" + "# export to csv\n", + "df_rubber.to_csv(\"../../datasets/raw/qa_values/rubber_qa/deforestation_raster_calc_raw_def_v2.csv\")" ] }, { @@ -8977,92 +9025,90 @@ } ], "source": [ - "output_folder = '../../datasets/raw/downloaded_tiles'\n", - "tiles_outputs_def20 = '../../datasets/processed/processed_files/def20'\n", - "tiles_outputs_count = '../../datasets/processed/processed_files/count'\n", - "tiles_outputs_density = '../../datasets/processed/processed_files/density'\n", + "output_folder = \"../../datasets/raw/downloaded_tiles\"\n", + "tiles_outputs_def20 = \"../../datasets/processed/processed_files/def20\"\n", + "tiles_outputs_count = \"../../datasets/processed/processed_files/count\"\n", + "tiles_outputs_density = \"../../datasets/processed/processed_files/density\"\n", "\n", "tiles_to_download = [\n", - " '10N_010W',\n", - " '10N_020W',\n", - " '40N_080W',\n", - " '40N_090W',\n", - " '40N_100W',\n", - " '40N_130W',\n", - " '20N_160W',\n", - " '30N_160W',\n", - " '50N_070W',\n", - " '50N_080W',\n", - " '50N_090W',\n", - " '50N_100W',\n", - " '50N_110W',\n", - " '50N_120W',\n", - " '50N_130W',\n", - " '40N_030E',\n", - " '70N_160W',\n", - " '70N_170W',\n", - " '60N_170W',\n", - " '60N_160W',\n", - " '80N_160W',\n", - " '80N_170W',\n", - " '30N_070E',\n", - " '20N_070E',\n", - " '10N_070E',\n", - " '30N_080E',\n", - " '20N_080E',\n", - " '10N_080E',\n", - " '40N_070E',\n", - " '30N_090E',\n", - " '30N_100E',\n", - " '20N_100E',\n", - " '10N_090E',\n", - " '00N_090E',\n", - " '00N_110E',\n", - " '10N_110E',\n", - " '10N_120E',\n", - " '00N_130E',\n", - " '00N_140E',\n", - " '40N_130E',\n", - " '40N_140E',\n", - " '80N_160W',\n", - " '70N_180W',\n", - " '80N_150W',\n", - " '60N_150W',\n", - " '60N_140W',\n", - " '70N_140W',\n", - " '30N_100W',\n", - " '30N_090W',\n", - " '30N_110W',\n", - " '30N_060E',\n", - " '40N_080E',\n", - " '10S_120E',\n", - " '40N_120E',\n", - " '50N_130E' \n", + " \"10N_010W\",\n", + " \"10N_020W\",\n", + " \"40N_080W\",\n", + " \"40N_090W\",\n", + " \"40N_100W\",\n", + " \"40N_130W\",\n", + " \"20N_160W\",\n", + " \"30N_160W\",\n", + " \"50N_070W\",\n", + " \"50N_080W\",\n", + " \"50N_090W\",\n", + " \"50N_100W\",\n", + " \"50N_110W\",\n", + " \"50N_120W\",\n", + " \"50N_130W\",\n", + " \"40N_030E\",\n", + " \"70N_160W\",\n", + " \"70N_170W\",\n", + " \"60N_170W\",\n", + " \"60N_160W\",\n", + " \"80N_160W\",\n", + " \"80N_170W\",\n", + " \"30N_070E\",\n", + " \"20N_070E\",\n", + " \"10N_070E\",\n", + " \"30N_080E\",\n", + " \"20N_080E\",\n", + " \"10N_080E\",\n", + " \"40N_070E\",\n", + " \"30N_090E\",\n", + " \"30N_100E\",\n", + " \"20N_100E\",\n", + " \"10N_090E\",\n", + " \"00N_090E\",\n", + " \"00N_110E\",\n", + " \"10N_110E\",\n", + " \"10N_120E\",\n", + " \"00N_130E\",\n", + " \"00N_140E\",\n", + " \"40N_130E\",\n", + " \"40N_140E\",\n", + " \"80N_160W\",\n", + " \"70N_180W\",\n", + " \"80N_150W\",\n", + " \"60N_150W\",\n", + " \"60N_140W\",\n", + " \"70N_140W\",\n", + " \"30N_100W\",\n", + " \"30N_090W\",\n", + " \"30N_110W\",\n", + " \"30N_060E\",\n", + " \"40N_080E\",\n", + " \"10S_120E\",\n", + " \"40N_120E\",\n", + " \"50N_130E\",\n", "]\n", "\n", "for tile in tiles_to_download:\n", - " \n", - " url = f'https://storage.googleapis.com/earthenginepartners-hansen/GFC-2020-v1.8/Hansen_GFC-2020-v1.8_lossyear_{tile}.tif'\n", - " print(f'Requesting {url}')\n", - " \n", - " urllib.request.urlretrieve(url, output_folder+f\"/Hansen_GFC-2020-v1.8_lossyear_{tile}.tif\")\n", - " print(f'Tile {tile} suscessfully downloaded!')\n", - " \n", - " raster_path = output_folder+f\"/Hansen_GFC-2020-v1.8_lossyear_{tile}.tif\"\n", - " outputh_file_def20_v2 = tiles_outputs_def20+f\"Hansen_GFC-2020-v1.8_lossyear_{tile}_v2.tif\"\n", - " outputh_file_count_v2 = tiles_outputs_count+f\"Hansen_GFC-2020-v1.8_lossyear_{tile}_v2.tif\"\n", - " outputh_file_max_v2 = tiles_outputs_count+f\"Hansen_GFC-2020-v1.8_lossyear_{tile}_v2_c.tif\"\n", - " outputh_file_density_v2 = tiles_outputs_density+f\"Hansen_GFC-2020-v1.8_lossyear_{tile}_v2.tif\"\n", - " \n", - " #!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -r max -tr 0.0833333333333286 0.0833333333333286 -multi -of GTiff -wo NUM_THREADS=ALL_CPUS $raster_path $outputh_file; \n", + " url = f\"https://storage.googleapis.com/earthenginepartners-hansen/GFC-2020-v1.8/Hansen_GFC-2020-v1.8_lossyear_{tile}.tif\"\n", + " print(f\"Requesting {url}\")\n", + "\n", + " urllib.request.urlretrieve(url, output_folder + f\"/Hansen_GFC-2020-v1.8_lossyear_{tile}.tif\")\n", + " print(f\"Tile {tile} suscessfully downloaded!\")\n", + "\n", + " raster_path = output_folder + f\"/Hansen_GFC-2020-v1.8_lossyear_{tile}.tif\"\n", + " outputh_file_def20_v2 = tiles_outputs_def20 + f\"Hansen_GFC-2020-v1.8_lossyear_{tile}_v2.tif\"\n", + " outputh_file_count_v2 = tiles_outputs_count + f\"Hansen_GFC-2020-v1.8_lossyear_{tile}_v2.tif\"\n", + " outputh_file_max_v2 = tiles_outputs_count + f\"Hansen_GFC-2020-v1.8_lossyear_{tile}_v2_c.tif\"\n", + " outputh_file_density_v2 = tiles_outputs_density + f\"Hansen_GFC-2020-v1.8_lossyear_{tile}_v2.tif\"\n", + "\n", + " #!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -r max -tr 0.0833333333333286 0.0833333333333286 -multi -of GTiff -wo NUM_THREADS=ALL_CPUS $raster_path $outputh_file;\n", " !gdal_calc.py --calc \"(A>18)\" --format GTiff --type Byte -A $raster_path --A_band 1 --outfile $outputh_file_def20_v2;\n", " !rm -f $raster_path\n", " !gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -r max -tr 0.0833333333333286 0.0833333333333286 --NoDataValue 0.0 -multi -of GTiff -wo NUM_THREADS=ALL_CPUS $outputh_file_def20_v2 $outputh_file_max_v2;\n", " #!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -r sum -tr 0.0833333333333286 0.0833333333333286 -multi -of GTiff -wo NUM_THREADS=ALL_CPUS $outputh_file_def20_v2 $outputh_file_count_v2;\n", " !rm -f $outputh_file_def20_v2\n", " #!gdal_calc.py --calc \"A/111111.1111111111\" --format GTiff --type Float32 --NoDataValue 0.0 -A $outputh_file_count_v2 --A_band 1 --outfile $outputh_file_density_v2;\n", - " #!rm -f $outputh_file_max_v2;\n", - " " + " #!rm -f $outputh_file_max_v2;" ] }, { @@ -9080,7 +9126,7 @@ } ], "source": [ - "#generate virtual tile\n", + "# generate virtual tile\n", "!gdalbuildvrt ../../datasets/processed/processed_files/PROCESSED/hansen_loss_2020_ha_density_v2_c.vrt ../../datasets/processed/processed_files/count/*.tif" ] }, @@ -9100,7 +9146,7 @@ } ], "source": [ - "#translate\n", + "# translate\n", "!gdal_translate -of GTiff -co NUM_THREADS=ALL_CPUS -co BIGTIFF=YES -co COMPRESS=DEFLATE -co PREDICTOR=2 -co ZLEVEL=9 -co BLOCKXSIZE=512 -co BLOCKYSIZE=512 ../../datasets/processed/processed_files/PROCESSED/hansen_loss_2020_ha_density_v2_c.vrt ../../datasets/processed/processed_files/PROCESSED/hansen_loss_2020_ha_density_v2_c.tif" ] }, @@ -9125,13 +9171,21 @@ } ], "source": [ - "#Translate raster to h3\n", + "# Translate raster to h3\n", "resolution = 6\n", - "raster_path = '../../datasets/processed/processed_files/PROCESSED/hansen_loss_2020_ha_density_v2_c.tif'\n", + "raster_path = (\n", + " \"../../datasets/processed/processed_files/PROCESSED/hansen_loss_2020_ha_density_v2_c.tif\"\n", + ")\n", "with rio.open(raster_path) as src:\n", - " gdf = raster.raster_to_geodataframe(src.read(1), src.transform, h3_resolution=resolution, nodata_value=src.profile['nodata'], compacted=False)\n", - " #gdf.plot('value')\n", - " gdf['h3index'] = gdf['h3index'].apply(hex)" + " gdf = raster.raster_to_geodataframe(\n", + " src.read(1),\n", + " src.transform,\n", + " h3_resolution=resolution,\n", + " nodata_value=src.profile[\"nodata\"],\n", + " compacted=False,\n", + " )\n", + " # gdf.plot('value')\n", + " gdf[\"h3index\"] = gdf[\"h3index\"].apply(hex)" ] }, { @@ -9223,8 +9277,8 @@ } ], "source": [ - "h3index = [el.split('x')[1] for el in list(gdf['h3index'])]\n", - "gdf['h3index']= h3index\n", + "h3index = [el.split(\"x\")[1] for el in list(gdf[\"h3index\"])]\n", + "gdf[\"h3index\"] = h3index\n", "gdf.head()" ] }, @@ -9235,8 +9289,10 @@ "metadata": {}, "outputs": [], "source": [ - "#export to shp\n", - "gdf.to_file('../../datasets/processed/processed_files/PROCESSED/hansen_loss_2020_ha_density_h3_v2.shp')" + "# export to shp\n", + "gdf.to_file(\n", + " \"../../datasets/processed/processed_files/PROCESSED/hansen_loss_2020_ha_density_h3_v2.shp\"\n", + ")" ] }, { @@ -9328,7 +9384,9 @@ } ], "source": [ - "gdf = gpd.read_file('../../datasets/processed/processed_files/PROCESSED/hansen_loss_2020_ha_density_h3.shp')\n", + "gdf = gpd.read_file(\n", + " \"../../datasets/processed/processed_files/PROCESSED/hansen_loss_2020_ha_density_h3.shp\"\n", + ")\n", "gdf.head()" ] }, @@ -9339,8 +9397,8 @@ "metadata": {}, "outputs": [], "source": [ - "list_ = list(gdf['h3index'])\n", - "list_replace = str(list_).replace('[','(').replace(']',')')" + "list_ = list(gdf[\"h3index\"])\n", + "list_replace = str(list_).replace(\"[\", \"(\").replace(\"]\", \")\")" ] }, { @@ -9419,10 +9477,13 @@ } ], "source": [ - "#get material\n", + "# get material\n", "\n", - "rubber_df = pd.read_sql_query(f\"\"\"select hatable.h3index h3index, hatable.\"earthstat2000GlobalRubberHarvestedareahectares\" v from h3_grid_earthstat2000_global_ha hatable \n", - "where hatable.\"earthstat2000GlobalRubberHarvestedareahectares\">0 and hatable.h3index in {list_replace}\"\"\", conn)\n", + "rubber_df = pd.read_sql_query(\n", + " f\"\"\"select hatable.h3index h3index, hatable.\"earthstat2000GlobalRubberHarvestedareahectares\" v from h3_grid_earthstat2000_global_ha hatable \n", + "where hatable.\"earthstat2000GlobalRubberHarvestedareahectares\">0 and hatable.h3index in {list_replace}\"\"\",\n", + " conn,\n", + ")\n", "rubber_df.head()" ] }, @@ -9521,10 +9582,8 @@ } ], "source": [ - "#join gdf with harvest area\n", - "density_ha_rubber = gdf.merge(rubber_df,\n", - " how='inner',\n", - " on='h3index')\n", + "# join gdf with harvest area\n", + "density_ha_rubber = gdf.merge(rubber_df, how=\"inner\", on=\"h3index\")\n", "density_ha_rubber.head()" ] }, @@ -9598,13 +9657,16 @@ } ], "source": [ - "#get georegions\n", + "# get georegions\n", "\n", - "gr_ids = pd.read_sql_query(\"\"\"select sr.id from sourcing_location sl \n", + "gr_ids = pd.read_sql_query(\n", + " \"\"\"select sr.id from sourcing_location sl \n", "inner join sourcing_records sr ON sr.\"sourcingLocationId\" = sl.id \n", "inner join geo_region gr on gr.id = sl.\"geoRegionId\" \n", "where sl.\"materialId\" = '0d7b1be5-dc86-47b8-ba3a-25190a275011'\n", - "\"\"\", conn)\n", + "\"\"\",\n", + " conn,\n", + ")\n", "gr_ids.head()" ] }, @@ -9616,18 +9678,21 @@ "outputs": [], "source": [ "sum_density = []\n", - "for sr_id in list(gr_ids['id']):\n", - " #print(sr_id)\n", - " _gr = pd.read_sql_query(f\"\"\"select sr.id, h3_uncompact(gr.\"h3Compact\"::h3index[], 6) from sourcing_location sl \n", + "for sr_id in list(gr_ids[\"id\"]):\n", + " # print(sr_id)\n", + " _gr = pd.read_sql_query(\n", + " f\"\"\"select sr.id, h3_uncompact(gr.\"h3Compact\"::h3index[], 6) from sourcing_location sl \n", " inner join sourcing_records sr ON sr.\"sourcingLocationId\" = sl.id \n", " inner join geo_region gr on gr.id = sl.\"geoRegionId\" \n", " where sl.\"materialId\" = '0d7b1be5-dc86-47b8-ba3a-25190a275011' and sr.id = '{sr_id}'\n", - " \"\"\", conn)\n", - " #print(_gr)\n", - " \n", - " density_filter = density_ha_rubber[density_ha_rubber['h3index'].isin(list(_gr['h3_uncompact']))]\n", - " sum_ = sum(density_filter['value']*3612.9) \n", - " #print(round(sum_))\n", + " \"\"\",\n", + " conn,\n", + " )\n", + " # print(_gr)\n", + "\n", + " density_filter = density_ha_rubber[density_ha_rubber[\"h3index\"].isin(list(_gr[\"h3_uncompact\"]))]\n", + " sum_ = sum(density_filter[\"value\"] * 3612.9)\n", + " # print(round(sum_))\n", " sum_density.append(round(sum_))" ] }, @@ -9678,8 +9743,7 @@ "#rasterise biodiversity layer\n", "!gdal_rasterize -l wwf_terr_ecos -a Permanent_ -tr 0.0833333333333286 0.0833333333333286 -a_nodata 0.0 \\\n", "-te -180.0 -90.0 180.0 90.0 -ot Float32 \\\n", - "-of GTiff ../../datasets/raw/qa_values/rubber_qa/output/wwf_terr_ecos.shp ../../datasets/raw/qa_values/rubber_qa/output/lcia_psl_r_permanent_crops.tif\n", - "\n" + "-of GTiff ../../datasets/raw/qa_values/rubber_qa/output/wwf_terr_ecos.shp ../../datasets/raw/qa_values/rubber_qa/output/lcia_psl_r_permanent_crops.tif" ] }, { @@ -9704,7 +9768,7 @@ "# multiply biodiversity layer * deforestation layer\n", "!gdal_translate -projwin -180.0 80.0 150.0 -20.0 -of GTiff ../../datasets/raw/qa_values/rubber_qa/output/lcia_psl_r_permanent_crops.tif ../../datasets/raw/qa_values/rubber_qa/output/lcia_psl_r_permanent_crops_extent.tif\n", "!gdal_calc.py --calc \"(A*B)\" --format GTiff --type Byte -A ../../datasets/raw/qa_values/rubber_qa/output/lcia_psl_r_permanent_crops_extent.tif --A_band 1 -B ../../datasets/processed/processed_files/hansen_loss_harvest_area_rubber.tif \\\n", - "--outfile ../../datasets/processed/processed_files/raw_biodiversity.tif\n" + "--outfile ../../datasets/processed/processed_files/raw_biodiversity.tif" ] }, { @@ -9839,20 +9903,16 @@ } ], "source": [ - "#zonal statistics for those areas\n", - "raster_raw_biodiversity = '../../datasets/processed/processed_files/raw_biodiversity.tif'\n", + "# zonal statistics for those areas\n", + "raster_raw_biodiversity = \"../../datasets/processed/processed_files/raw_biodiversity.tif\"\n", "\n", "raster_stats = []\n", "for i, row in df_rubber.iterrows():\n", - " geom = row['theGeom']\n", - " stat_ = zonal_stats(geom,\n", - " raster_raw_biodiversity,\n", - " stats='sum',\n", - " all_touched = True\n", - " )\n", - " raster_stats.append(stat_[0]['sum'])\n", - " \n", - "df_rubber['rs_raw_bio']=raster_stats\n", + " geom = row[\"theGeom\"]\n", + " stat_ = zonal_stats(geom, raster_raw_biodiversity, stats=\"sum\", all_touched=True)\n", + " raster_stats.append(stat_[0][\"sum\"])\n", + "\n", + "df_rubber[\"rs_raw_bio\"] = raster_stats\n", "df_rubber.head()" ] }, @@ -9863,7 +9923,7 @@ "metadata": {}, "outputs": [], "source": [ - "df_rubber.to_csv('../../datasets/raw/qa_values/rubber_qa/biodiversity_raster_v1.csv')" + "df_rubber.to_csv(\"../../datasets/raw/qa_values/rubber_qa/biodiversity_raster_v1.csv\")" ] }, { @@ -9931,7 +9991,7 @@ } ], "source": [ - "#generate a base total irrigated production raster\n", + "# generate a base total irrigated production raster\n", "!gdal_calc.py --calc \"(A>0)*0\" --format GTiff --type Float32 --NoDataValue -9999 -A ../../datasets/raw/qa_values/water_impact/data/Report50-WF-of-production-RasterFiles/Report50-WF-of-prodn-RasterFiles/wf_bltot_mmyr/wf_bltot_mmyr.tif --outfile ../../datasets/raw/qa_values/water_impact/data/Report50-WF-of-production-RasterFiles/Report50-WF-of-prodn-RasterFiles/wf_bltot_mmyr/total_irrigated_production.tif;" ] }, @@ -9994,7 +10054,7 @@ } ], "source": [ - "#unzip all irrigated production commodities\n", + "# unzip all irrigated production commodities\n", "!unzip -u ../../datasets/raw/qa_values/water_impact/spam2010v2r0_global_prod.geotiff.zip *_I.tif -d ../../datasets/raw/qa_values/water_impact" ] }, @@ -10657,21 +10717,17 @@ } ], "source": [ - "#raster stats in rubber df\n", + "# raster stats in rubber df\n", "\n", - "raster_path = '../../datasets/raw/qa_values/water_impact/data/Report50-WF-of-production-RasterFiles/Report50-WF-of-prodn-RasterFiles/wf_bltot_mmyr/wf_bltot_mmyr_t.tif'\n", + "raster_path = \"../../datasets/raw/qa_values/water_impact/data/Report50-WF-of-production-RasterFiles/Report50-WF-of-prodn-RasterFiles/wf_bltot_mmyr/wf_bltot_mmyr_t.tif\"\n", "raster_stats = []\n", "for i, row in df_rubber.iterrows():\n", - " geom = row['theGeom']\n", - " stat_ = zonal_stats(geom,\n", - " raster_path,\n", - " stats='sum',\n", - " all_touched = True\n", - " )\n", - " raster_stats.append(stat_[0]['sum'])\n", - " \n", - "df_rubber['rs_raw_water']=raster_stats\n", - "df_rubber.head()\n" + " geom = row[\"theGeom\"]\n", + " stat_ = zonal_stats(geom, raster_path, stats=\"sum\", all_touched=True)\n", + " raster_stats.append(stat_[0][\"sum\"])\n", + "\n", + "df_rubber[\"rs_raw_water\"] = raster_stats\n", + "df_rubber.head()" ] }, { @@ -10681,8 +10737,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export to csv\n", - "df_rubber.to_csv('../../datasets/raw/qa_values/rubber_qa/raw_water_raster_v1.csv')" + "# export to csv\n", + "df_rubber.to_csv(\"../../datasets/raw/qa_values/rubber_qa/raw_water_raster_v1.csv\")" ] }, { @@ -10714,7 +10770,9 @@ "LANGUAGE SQL;\n", "\"\"\"\n", "\n", - "SQL_SUM_H3_GRID_OVER_GEO_REGION = SQL_GET_H3_UNCOMPACT_GEO_REGION+\"\"\"\n", + "SQL_SUM_H3_GRID_OVER_GEO_REGION = (\n", + " SQL_GET_H3_UNCOMPACT_GEO_REGION\n", + " + \"\"\"\n", "CREATE OR REPLACE FUNCTION sum_h3_grid_over_georegion(\n", " geo_region_id uuid, \n", " h3_resolution int,\n", @@ -10744,6 +10802,7 @@ "$$\n", "LANGUAGE plpgsql;\n", "\"\"\"\n", + ")\n", "\n", "SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL = \"\"\"\n", "CREATE OR REPLACE FUNCTION get_h3_table_column_for_material(material_id uuid, h3_data_type material_to_h3_type_enum)\n", @@ -10759,7 +10818,10 @@ "LANGUAGE SQL;\n", "\"\"\"\n", "\n", - "SQL_SUM_DISTINCT_MATERIAL_OVER_GEO_REGION = SQL_SUM_H3_GRID_OVER_GEO_REGION+SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL+\"\"\"\n", + "SQL_SUM_DISTINCT_MATERIAL_OVER_GEO_REGION = (\n", + " SQL_SUM_H3_GRID_OVER_GEO_REGION\n", + " + SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL\n", + " + \"\"\"\n", "CREATE OR REPLACE FUNCTION sum_distinct_material_over_georegion(\n", " geo_region_id uuid, \n", " material_id uuid,\n", @@ -10786,6 +10848,7 @@ "$$\n", "LANGUAGE plpgsql;\n", "\"\"\"\n", + ")\n", "\n", "SQL_SUM_DISTINCT_WEIGHTED_DEFORESTATION_OVER_GEO_REGION = \"\"\"\n", "CREATE OR REPLACE FUNCTION sum_distinct_weighted_deforestation_over_georegion(\n", @@ -11366,11 +11429,11 @@ ], "source": [ "sourcing_records_distinct = pd.read_sql_query(\n", - " SQL_SUM_DISTINCT_MATERIAL_OVER_GEO_REGION \\\n", - " + SQL_SUM_DISTINCT_WEIGHTED_DEFORESTATION_OVER_GEO_REGION \\\n", - " + SQL_SUM_DISTINCT_WEIGHTED_BIODIVERSITY_OVER_GEO_REGION \\\n", - " + SQL_SUM_DISTINCT_WEIGHTED_CARBON_OVER_GEO_REGION \\\n", - " + SQL_SUM_DISTINCT_WEIGHTED_WATER_OVER_GEO_REGION \\\n", + " SQL_SUM_DISTINCT_MATERIAL_OVER_GEO_REGION\n", + " + SQL_SUM_DISTINCT_WEIGHTED_DEFORESTATION_OVER_GEO_REGION\n", + " + SQL_SUM_DISTINCT_WEIGHTED_BIODIVERSITY_OVER_GEO_REGION\n", + " + SQL_SUM_DISTINCT_WEIGHTED_CARBON_OVER_GEO_REGION\n", + " + SQL_SUM_DISTINCT_WEIGHTED_WATER_OVER_GEO_REGION\n", " + \"\"\"\n", " SELECT\n", " sr.id,\n", @@ -11402,24 +11465,45 @@ " ) as sl\n", " on sr.\"sourcingLocationId\" = sl.id\n", " WHERE sl.\"materialId\"='0d7b1be5-dc86-47b8-ba3a-25190a275011'\n", - "\"\"\", conn)\n", + "\"\"\",\n", + " conn,\n", + ")\n", "\n", - "sourcing_records_distinct['land_per_ton'] = sourcing_records_distinct['harvested_area'] / sourcing_records_distinct['production']\n", + "sourcing_records_distinct[\"land_per_ton\"] = (\n", + " sourcing_records_distinct[\"harvested_area\"] / sourcing_records_distinct[\"production\"]\n", + ")\n", "\n", - "sourcing_records_distinct['deforestation_per_ha_landuse'] = sourcing_records_distinct['raw_deforestation'] / sourcing_records_distinct['harvested_area']\n", - "sourcing_records_distinct['bio_per_ha_landuse'] = sourcing_records_distinct['raw_biodiversity'] / sourcing_records_distinct['harvested_area']\n", - "sourcing_records_distinct['carbon_per_ha_landuse'] = sourcing_records_distinct['raw_carbon'] / sourcing_records_distinct['harvested_area']\n", - "sourcing_records_distinct['land_use'] = sourcing_records_distinct['land_per_ton'] * sourcing_records_distinct['tonnage']\n", + "sourcing_records_distinct[\"deforestation_per_ha_landuse\"] = (\n", + " sourcing_records_distinct[\"raw_deforestation\"] / sourcing_records_distinct[\"harvested_area\"]\n", + ")\n", + "sourcing_records_distinct[\"bio_per_ha_landuse\"] = (\n", + " sourcing_records_distinct[\"raw_biodiversity\"] / sourcing_records_distinct[\"harvested_area\"]\n", + ")\n", + "sourcing_records_distinct[\"carbon_per_ha_landuse\"] = (\n", + " sourcing_records_distinct[\"raw_carbon\"] / sourcing_records_distinct[\"harvested_area\"]\n", + ")\n", + "sourcing_records_distinct[\"land_use\"] = (\n", + " sourcing_records_distinct[\"land_per_ton\"] * sourcing_records_distinct[\"tonnage\"]\n", + ")\n", "\n", - "sourcing_records_distinct['deforestation'] = sourcing_records_distinct['deforestation_per_ha_landuse'] * sourcing_records_distinct['land_use']\n", - "sourcing_records_distinct['biodiversity_loss'] = sourcing_records_distinct['bio_per_ha_landuse'] * sourcing_records_distinct['land_use']\n", - "sourcing_records_distinct['carbon_loss'] = sourcing_records_distinct['carbon_per_ha_landuse'] * sourcing_records_distinct['land_use']\n", - "sourcing_records_distinct['water_impact'] = sourcing_records_distinct['raw_water'] * sourcing_records_distinct['tonnage']\n", + "sourcing_records_distinct[\"deforestation\"] = (\n", + " sourcing_records_distinct[\"deforestation_per_ha_landuse\"]\n", + " * sourcing_records_distinct[\"land_use\"]\n", + ")\n", + "sourcing_records_distinct[\"biodiversity_loss\"] = (\n", + " sourcing_records_distinct[\"bio_per_ha_landuse\"] * sourcing_records_distinct[\"land_use\"]\n", + ")\n", + "sourcing_records_distinct[\"carbon_loss\"] = (\n", + " sourcing_records_distinct[\"carbon_per_ha_landuse\"] * sourcing_records_distinct[\"land_use\"]\n", + ")\n", + "sourcing_records_distinct[\"water_impact\"] = (\n", + " sourcing_records_distinct[\"raw_water\"] * sourcing_records_distinct[\"tonnage\"]\n", + ")\n", "\n", "# Farm impact scaler = production\n", "# Land use change impact scaler = harvested_area\n", "\n", - "#sourcing_records.to_csv('test_impact_calc.csv')\n", + "# sourcing_records.to_csv('test_impact_calc.csv')\n", "\n", "sourcing_records_distinct" ] @@ -11431,8 +11515,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export to csv\n", - "sourcing_records_distinct.to_csv('../../datasets/raw/qa_values/rubber_qa/all_impacts_distinct.csv')" + "# export to csv\n", + "sourcing_records_distinct.to_csv(\"../../datasets/raw/qa_values/rubber_qa/all_impacts_distinct.csv\")" ] } ], diff --git a/data/notebooks/Lab/QA_Deforestation_and carbon_formulas.ipynb b/data/notebooks/Lab/QA_Deforestation_and carbon_formulas.ipynb index ee6e51c58..c99ef8ad7 100644 --- a/data/notebooks/Lab/QA_Deforestation_and carbon_formulas.ipynb +++ b/data/notebooks/Lab/QA_Deforestation_and carbon_formulas.ipynb @@ -54,10 +54,10 @@ "metadata": {}, "outputs": [], "source": [ - "#import libraries\n", + "# import libraries\n", "\n", - "from psycopg2.pool import ThreadedConnectionPool\n", - "import pandas as pd" + "import pandas as pd\n", + "from psycopg2.pool import ThreadedConnectionPool" ] }, { @@ -67,8 +67,8 @@ "metadata": {}, "outputs": [], "source": [ - "#set env\n", - "#set env\n", + "# set env\n", + "# set env\n", "## env file for gcs upload\n", "env_path = \".env\"\n", "with open(env_path) as f:\n", @@ -77,20 +77,20 @@ " env_key, _val = line.split(\"=\", 1)\n", " env_value = _val.split(\"\\n\")[0]\n", " env[env_key] = env_value\n", - " \n", - "#list(env.keys())\n", + "\n", + "# list(env.keys())\n", "\n", "# set conexion to local ddbb\n", "postgres_thread_pool = ThreadedConnectionPool(\n", - " 1, \n", + " 1,\n", " 50,\n", - " host=env['API_POSTGRES_HOST'],\n", - " port=env['API_POSTGRES_PORT'],\n", - " user=env['API_POSTGRES_USERNAME'],\n", - " password=env['API_POSTGRES_PASSWORD']\n", + " host=env[\"API_POSTGRES_HOST\"],\n", + " port=env[\"API_POSTGRES_PORT\"],\n", + " user=env[\"API_POSTGRES_USERNAME\"],\n", + " password=env[\"API_POSTGRES_PASSWORD\"],\n", ")\n", "\n", - "#get list of sourcing records to iterate:\n", + "# get list of sourcing records to iterate:\n", "conn = postgres_thread_pool.getconn()\n", "cursor = conn.cursor()" ] @@ -118,7 +118,8 @@ "metadata": {}, "outputs": [], "source": [ - "#define queries\n", + "# define queries\n", + "\n", "\n", "SQL_GET_H3_UNCOMPACT_GEO_REGION = \"\"\"\n", "CREATE OR REPLACE FUNCTION get_h3_uncompact_geo_region(geo_region_id uuid, h3_resolution int)\n", @@ -130,7 +131,7 @@ "LANGUAGE SQL;\n", "\"\"\"\n", "\n", - "#asuming that all the landindicators have the buffer version in the table\n", + "# asuming that all the landindicators have the buffer version in the table\n", "SQL_GET_H3_TABLE_COLUMN_FOR_LAND_INDICATORS = \"\"\"\n", "CREATE OR REPLACE FUNCTION get_h3_table_column_for_land_indicators(shortName text)\n", "RETURNS TABLE (h3_resolution int, h3_table_name varchar, h3_column_name varchar) AS\n", @@ -159,7 +160,9 @@ "LANGUAGE SQL;\n", "\"\"\"\n", "\n", - "SQL_SUM_H3_GRID_OVER_GEO_REGION = SQL_GET_H3_UNCOMPACT_GEO_REGION+\"\"\"\n", + "SQL_SUM_H3_GRID_OVER_GEO_REGION = (\n", + " SQL_GET_H3_UNCOMPACT_GEO_REGION\n", + " + \"\"\"\n", "CREATE OR REPLACE FUNCTION sum_h3_grid_over_georegion(\n", " geo_region_id uuid, \n", " h3_resolution int,\n", @@ -184,8 +187,12 @@ "$$\n", "LANGUAGE plpgsql;\n", "\"\"\"\n", + ")\n", "\n", - "SQL_SUM_MATERIAL_OVER_GEO_REGION = SQL_SUM_H3_GRID_OVER_GEO_REGION+SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL+\"\"\"\n", + "SQL_SUM_MATERIAL_OVER_GEO_REGION = (\n", + " SQL_SUM_H3_GRID_OVER_GEO_REGION\n", + " + SQL_GET_H3_TABLE_COLUMN_FOR_MATERIAL\n", + " + \"\"\"\n", "CREATE OR REPLACE FUNCTION sum_material_over_georegion(\n", " geo_region_id uuid, \n", " material_id uuid,\n", @@ -212,6 +219,7 @@ "$$\n", "LANGUAGE plpgsql;\n", "\"\"\"\n", + ")\n", "\n", "SQL_GET_ANNUAL_DEFORESTATION_OVER_GEO_REGION = \"\"\"\n", "CREATE OR REPLACE FUNCTION get_annual_deforestation_over_georegion(\n", @@ -438,10 +446,10 @@ ], "source": [ "sourcing_records = pd.read_sql_query(\n", - " SQL_SUM_MATERIAL_OVER_GEO_REGION \\\n", - " + SQL_GET_H3_TABLE_COLUMN_FOR_LAND_INDICATORS \\\n", - " + SQL_GET_ANNUAL_DEFORESTATION_OVER_GEO_REGION \\\n", - " + SQL_GET_ANNUAL_CARBON_EMISSIONS_OVER_GEO_REGION \\\n", + " SQL_SUM_MATERIAL_OVER_GEO_REGION\n", + " + SQL_GET_H3_TABLE_COLUMN_FOR_LAND_INDICATORS\n", + " + SQL_GET_ANNUAL_DEFORESTATION_OVER_GEO_REGION\n", + " + SQL_GET_ANNUAL_CARBON_EMISSIONS_OVER_GEO_REGION\n", " + \"\"\"\n", " SELECT \n", " sr.id,\n", @@ -467,32 +475,41 @@ " sourcing_location\n", " ) AS sl\n", " ON sr.\"sourcingLocationId\" = sl.\"id\"\n", - " \"\"\", conn)\n", - "\n", - "sourcing_records['land_per_ton'] = sourcing_records['harvest'] / sourcing_records['production']\n", - "sourcing_records['land_use'] = sourcing_records['land_per_ton'] * sourcing_records['tonnage']\n", + " \"\"\",\n", + " conn,\n", + ")\n", "\n", + "sourcing_records[\"land_per_ton\"] = sourcing_records[\"harvest\"] / sourcing_records[\"production\"]\n", + "sourcing_records[\"land_use\"] = sourcing_records[\"land_per_ton\"] * sourcing_records[\"tonnage\"]\n", "\n", "\n", - "##Assuming that all forest loss is due to human land use (crop/pasture/managed forest/urban) and all human land use within 50km of the deforested pixel is equally responsible: \n", - "#What is the average number of hectares of forest lost per hectare of cropland in the local area/jurisdiction?\n", - "#NOTE: Should we do this with buffer or withouth?\n", + "##Assuming that all forest loss is due to human land use (crop/pasture/managed forest/urban) and all human land use within 50km of the deforested pixel is equally responsible:\n", + "# What is the average number of hectares of forest lost per hectare of cropland in the local area/jurisdiction?\n", + "# NOTE: Should we do this with buffer or withouth?\n", "\n", - "#DEFORESTATION:\n", + "# DEFORESTATION:\n", "# 1. calculate the total hectares of land deforested - THIS IS ALREADY ACCOUNTED ON THE ANNUAL DEFOREDTATION IN THE GEOREGION\n", "# 2. Calculate total hectares of human land use?? Why human land use? FOR NOW I'LL USE THE TOTAL HECTARES OF CROP IN MY GEOREGION\n", - "# 3. Divide the total hectaes of land deforested/harvest area to get the deforestation rate per hectare of land use \n", + "# 3. Divide the total hectaes of land deforested/harvest area to get the deforestation rate per hectare of land use\n", "# 4. Multiply that by the land use impact of my material\n", "\n", - "sourcing_records['buffer_deforestation_per_ha_land_use'] = sourcing_records['def_annual'] / sourcing_records['harvest'] #change this harvest area by the total human area or the total pasture+crop area in georegion?\n", - "sourcing_records['deforestation_risk'] = sourcing_records['buffer_deforestation_per_ha_land_use'] * sourcing_records['land_use']\n", + "sourcing_records[\"buffer_deforestation_per_ha_land_use\"] = (\n", + " sourcing_records[\"def_annual\"] / sourcing_records[\"harvest\"]\n", + ") # change this harvest area by the total human area or the total pasture+crop area in georegion?\n", + "sourcing_records[\"deforestation_risk\"] = (\n", + " sourcing_records[\"buffer_deforestation_per_ha_land_use\"] * sourcing_records[\"land_use\"]\n", + ")\n", "\n", - "#CARBON:\n", + "# CARBON:\n", "# 1. Calculate the total carbon emissions in georegion\n", "# 2. Calculate the total carbon emissions per hectares of land use\n", "# 3. Multiply that by the land use impact\n", - "sourcing_records['buffer_emissions_per_ha_land_use'] = sourcing_records['emissions_annual'] / sourcing_records['harvest'] #change this harvest area by the total human area or the total pasture+crop area in georegion?\n", - "sourcing_records['emissions_risk'] = sourcing_records['buffer_emissions_per_ha_land_use'] * sourcing_records['land_use']\n", + "sourcing_records[\"buffer_emissions_per_ha_land_use\"] = (\n", + " sourcing_records[\"emissions_annual\"] / sourcing_records[\"harvest\"]\n", + ") # change this harvest area by the total human area or the total pasture+crop area in georegion?\n", + "sourcing_records[\"emissions_risk\"] = (\n", + " sourcing_records[\"buffer_emissions_per_ha_land_use\"] * sourcing_records[\"land_use\"]\n", + ")\n", "\n", "\n", "sourcing_records.head()" @@ -505,8 +522,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export to csv\n", - "sourcing_records.to_csv('../../datasets/raw/TRASE_data/carbon_deforestation_updated_values.csv')" + "# export to csv\n", + "sourcing_records.to_csv(\"../../datasets/raw/TRASE_data/carbon_deforestation_updated_values.csv\")" ] }, { diff --git a/data/notebooks/Lab/QA_h3_vs_raster_calculations.ipynb b/data/notebooks/Lab/QA_h3_vs_raster_calculations.ipynb index ee31b9b3b..44f0faa1a 100644 --- a/data/notebooks/Lab/QA_h3_vs_raster_calculations.ipynb +++ b/data/notebooks/Lab/QA_h3_vs_raster_calculations.ipynb @@ -28,26 +28,17 @@ "metadata": {}, "outputs": [], "source": [ - "#import libraries\n", + "# import libraries\n", "\n", - "import numpy as np\n", "import geopandas as gpd\n", - "import pandas as pd\n", - "\n", "import h3\n", - "import h3pandas\n", - "from h3ronpy import raster\n", + "import numpy as np\n", "import rasterio as rio\n", - "from rasterio import mask\n", - "from rasterstats import zonal_stats #gen_zonal_stats, gen_point_query, \n", + "from h3ronpy import raster\n", + "from rasterstats import zonal_stats # gen_zonal_stats, gen_point_query,\n", "\n", - "from matplotlib import pyplot\n", "%matplotlib inline\n", "\n", - "import folium\n", - "\n", - "\n", - "import argparse\n", "\n", "import cv2\n", "import numpy as np" @@ -105,10 +96,10 @@ }, "outputs": [], "source": [ - "#open mill locations\n", + "# open mill locations\n", "\n", - "#gdf = gpd.read_file(f\"{FILE_DIR}/satelligence data/AcehMills_indicators.gpkg\")\n", - "#gdf.head()" + "# gdf = gpd.read_file(f\"{FILE_DIR}/satelligence data/AcehMills_indicators.gpkg\")\n", + "# gdf.head()" ] }, { @@ -118,38 +109,38 @@ "metadata": {}, "outputs": [], "source": [ - "#buffer = 50000\n", + "# buffer = 50000\n", "\n", - "def get_buffer(gdf, buffer, save=True, save_path='./'):\n", - " \n", - " if gdf.crs and gdf.crs != 'EPSG:3857':\n", - " print('Reprojecting to EPSG:3857')\n", - " #reproject\n", + "\n", + "def get_buffer(gdf, buffer, save=True, save_path=\"./\"):\n", + " if gdf.crs and gdf.crs != \"EPSG:3857\":\n", + " print(\"Reprojecting to EPSG:3857\")\n", + " # reproject\n", " gdf_reprojected = gdf.to_crs(\"EPSG:3857\")\n", " else:\n", - " print('Set a valid projection to the vector layer')\n", + " print(\"Set a valid projection to the vector layer\")\n", "\n", " gdf_buffer = gdf_reprojected.buffer(buffer)\n", "\n", - " gdf_buffer_reprojected = gdf_buffer.to_crs('EPSG:4326')\n", + " gdf_buffer_reprojected = gdf_buffer.to_crs(\"EPSG:4326\")\n", " if save:\n", " gdf_buffer_reprojected.to_file(save_path)\n", " return gdf_buffer_reprojected\n", "\n", + "\n", "def get_buffer_stats(\n", " raster_path,\n", " vector_path,\n", " buffer=50000,\n", - " stat_='sum',\n", - " all_touched= True,\n", - " column_name ='estimated_val'\n", - " ):\n", - " \n", + " stat_=\"sum\",\n", + " all_touched=True,\n", + " column_name=\"estimated_val\",\n", + "):\n", " \"\"\"\n", " Function to obtain raster stats in abuffer geometry.\n", - " The function calculates first the buffer from the point vector file and then calculates the \n", + " The function calculates first the buffer from the point vector file and then calculates the\n", " raster stadistics inside the geometry.\n", - " \n", + "\n", " Inputs\n", " ------------------------\n", " raster_path: Raster path for retrieving the statdistics in EPSG:4326 projection.\n", @@ -157,94 +148,96 @@ " buffer: Radio distance in meters for computting the buffer geometry.\n", " stat_: Stadistics to compute using the zonal stadistics.\n", " all_touched: condition for the zonal stadistics. Used True as default.\n", - " \n", + "\n", " Output\n", " -----------------------\n", - " \n", + "\n", " gdf with stadistics\n", - " \n", + "\n", " \"\"\"\n", " gdf = gpd.read_file(f\"{vector_path}\")\n", - " \n", - " gdf_buffer = get_buffer(gdf, buffer, save=False, save_path='./')\n", - " \n", - " #if gdf.crs and gdf.crs != 'EPSG:3857':\n", + "\n", + " gdf_buffer = get_buffer(gdf, buffer, save=False, save_path=\"./\")\n", + "\n", + " # if gdf.crs and gdf.crs != 'EPSG:3857':\n", " # print('Reprojecting to EPSG:3857')\n", " # #reproject\n", " # gdf_reprojected = gdf.to_crs(\"EPSG:3857\")\n", - " #else:\n", + " # else:\n", " # print('Set a valid projection to the vector layer')\n", "\n", " ##get buffer\n", "\n", - " #gdf_buffer = gdf_reprojected.buffer(buffer)\n", + " # gdf_buffer = gdf_reprojected.buffer(buffer)\n", "\n", - " #reproject back to EPSG4326 as raster data should be provided in this projection\n", + " # reproject back to EPSG4326 as raster data should be provided in this projection\n", "\n", - " #gdf_buffer_reprojected = gdf_buffer.to_crs('EPSG:4326')\n", + " # gdf_buffer_reprojected = gdf_buffer.to_crs('EPSG:4326')\n", "\n", " stadistics = []\n", " for geom in gdf_buffer:\n", - " stats = zonal_stats(\n", - " geom,\n", - " raster_path,\n", - " stats = stat_,\n", - " all_touched = all_touched\n", - " )\n", - " stat_sum = stats[0]['sum']\n", + " stats = zonal_stats(geom, raster_path, stats=stat_, all_touched=all_touched)\n", + " stat_sum = stats[0][\"sum\"]\n", " stadistics.append(stat_sum)\n", - " #add stats in dataframe\n", - " gdf[column_name]=stadistics\n", + " # add stats in dataframe\n", + " gdf[column_name] = stadistics\n", " return gdf\n", "\n", + "\n", "def convert_rasterToH3(raster_path, resolution=6):\n", " with rio.open(raster_path) as src:\n", - " gdf = raster.raster_to_geodataframe(src.read(1), src.transform, h3_resolution=resolution, nodata_value=src.profile['nodata'], compacted=False)\n", + " gdf = raster.raster_to_geodataframe(\n", + " src.read(1),\n", + " src.transform,\n", + " h3_resolution=resolution,\n", + " nodata_value=src.profile[\"nodata\"],\n", + " compacted=False,\n", + " )\n", "\n", - " gdf.plot('value')\n", - " gdf['h3index'] = gdf['h3index'].apply(hex)\n", + " gdf.plot(\"value\")\n", + " gdf[\"h3index\"] = gdf[\"h3index\"].apply(hex)\n", " return gdf\n", - " \n", - "def get_h3_vector_statistics(raster_path, vector_path, column='estimated', resolution=6):\n", + "\n", + "\n", + "def get_h3_vector_statistics(raster_path, vector_path, column=\"estimated\", resolution=6):\n", " \"\"\"\n", - " Funtion to convert raster to h3 for a given resolution. The same function will obtain the sum of \n", + " Funtion to convert raster to h3 for a given resolution. The same function will obtain the sum of\n", " all the values for a given geometry.\n", - " \n", + "\n", " Inputs\n", " ---------------\n", " raster_path: Path to raster layer to convert to a given h3 resolution.\n", " vector_path: Path to vector layer with geometris to obtain the h3 zonal stadistics.\n", " column: name of the output column with the zonal stadistics.\n", " resolution: H3 resolution\n", - " \n", + "\n", " Output\n", " --------------\n", " gdf: GeoDataFrame with zonal statidtics\n", " \"\"\"\n", - " #with rio.open(raster_path) as src:\n", + " # with rio.open(raster_path) as src:\n", " # gdf = raster.raster_to_geodataframe(src.read(1), src.transform, h3_resolution=resolution, nodata_value=src.profile['nodata'], compacted=False)\n", - "#\n", + " #\n", " # gdf.plot('value')\n", " # gdf['h3index'] = gdf['h3index'].apply(hex)\n", " gdf = convert_rasterToH3(raster_path, resolution=resolution)\n", "\n", " gdf_vector = gpd.read_file(vector_path)\n", - " #clean_gdf = gdf_vector[['gfw_fid',column,'geometry']]\n", - " \n", + " # clean_gdf = gdf_vector[['gfw_fid',column,'geometry']]\n", + "\n", " _sum_calculated = []\n", " for i, row in gdf_vector.iterrows():\n", - " filtered_gdf = gdf_vector[i:i+1]\n", - " #convert to h3\n", + " filtered_gdf = gdf_vector[i : i + 1]\n", + " # convert to h3\n", " h3_gdf = filtered_gdf.h3.polyfill_resample(resolution)\n", - " #h3_gdf = h3_gdf.reset_index().rename(columns={'h3_polyfill':'h3index'})\n", - " h3index_list = [f'0x{h3index}' for h3index in h3_gdf.index]\n", - " #filter gdf by list and get value\n", - " _sum = gdf[gdf['h3index'].isin(h3index_list)]['value'].sum()\n", + " # h3_gdf = h3_gdf.reset_index().rename(columns={'h3_polyfill':'h3index'})\n", + " h3index_list = [f\"0x{h3index}\" for h3index in h3_gdf.index]\n", + " # filter gdf by list and get value\n", + " _sum = gdf[gdf[\"h3index\"].isin(h3index_list)][\"value\"].sum()\n", " _sum_calculated.append(_sum)\n", - " \n", + "\n", " gdf_vector[column] = _sum_calculated\n", - " return gdf_vector\n", - "\n" + " return gdf_vector" ] }, { @@ -254,9 +247,11 @@ "metadata": {}, "outputs": [], "source": [ - "#read raster/tif file\n", + "# read raster/tif file\n", "mills = f\"{FILE_DIR}/satelligence data/AcehMills_indicators.gpkg\"\n", - "def_tif = f\"{FILE_DIR}/satelligence data/rasters_indicators/Deforestation_IDN_2021-01-01-2022-01-01.tif\"\n", + "def_tif = (\n", + " f\"{FILE_DIR}/satelligence data/rasters_indicators/Deforestation_IDN_2021-01-01-2022-01-01.tif\"\n", + ")\n", "carb_tif = f\"{FILE_DIR}/satelligence data/rasters_indicators/AboveGroundBiomass_GLO_2001-01-01-2002-01-01.tif\"" ] }, @@ -373,14 +368,19 @@ } ], "source": [ - "area_h3_gdf = convert_rasterToH3(\"../../datasets/raw/h3_raster_QA_calculations/h3_area_correction/8_Areakm_clip.tif\", resolution=6)\n", + "area_h3_gdf = convert_rasterToH3(\n", + " \"../../datasets/raw/h3_raster_QA_calculations/h3_area_correction/8_Areakm_clip.tif\",\n", + " resolution=6,\n", + ")\n", "gdf_buffer50km = gpd.read_file(f\"{FILE_DIR}/satelligence data/AcehMills_indicators_50kmbuffer.shp\")\n", "\n", - "area_h3_gdf['h3index'] = [row['h3index'].split('x')[1] for i,row in area_h3_gdf.iterrows()]\n", - "area_h3_gdf['h3_area'] = [h3.cell_area(row['h3index'], unit='km^2') for i,row in area_h3_gdf.iterrows()]\n", - "area_h3_gdf = area_h3_gdf.rename(columns={'value':'raster_area'})\n", - "area_h3_gdf['ratio'] = area_h3_gdf['h3_area']/area_h3_gdf['raster_area']\n", - "area_h3_gdf.head()\n" + "area_h3_gdf[\"h3index\"] = [row[\"h3index\"].split(\"x\")[1] for i, row in area_h3_gdf.iterrows()]\n", + "area_h3_gdf[\"h3_area\"] = [\n", + " h3.cell_area(row[\"h3index\"], unit=\"km^2\") for i, row in area_h3_gdf.iterrows()\n", + "]\n", + "area_h3_gdf = area_h3_gdf.rename(columns={\"value\": \"raster_area\"})\n", + "area_h3_gdf[\"ratio\"] = area_h3_gdf[\"h3_area\"] / area_h3_gdf[\"raster_area\"]\n", + "area_h3_gdf.head()" ] }, { @@ -390,29 +390,34 @@ "metadata": {}, "outputs": [], "source": [ - "def get_zonal_stats_correction_factor(raster_path='./',\n", - " corrected_area_gdf=area_h3_gdf,\n", - " resolution=6,\n", - " buffer_gdf=gdf_buffer50km,\n", - " formula=1):\n", - " gdf = convert_rasterToH3(raster_path,resolution=resolution)\n", - " gdf['h3index'] = [row['h3index'].split('x')[1] for i,row in gdf.iterrows()]\n", - " gdf['h3_ratio'] = [list(corrected_area_gdf[corrected_area_gdf['h3index']==row['h3index']]['ratio'])[0] for i, row in gdf.iterrows()]\n", - " \n", - " gdf['corrected_value'] = gdf['value']*gdf['h3_ratio']*formula\n", - " \n", + "def get_zonal_stats_correction_factor(\n", + " raster_path=\"./\",\n", + " corrected_area_gdf=area_h3_gdf,\n", + " resolution=6,\n", + " buffer_gdf=gdf_buffer50km,\n", + " formula=1,\n", + "):\n", + " gdf = convert_rasterToH3(raster_path, resolution=resolution)\n", + " gdf[\"h3index\"] = [row[\"h3index\"].split(\"x\")[1] for i, row in gdf.iterrows()]\n", + " gdf[\"h3_ratio\"] = [\n", + " list(corrected_area_gdf[corrected_area_gdf[\"h3index\"] == row[\"h3index\"]][\"ratio\"])[0]\n", + " for i, row in gdf.iterrows()\n", + " ]\n", + "\n", + " gdf[\"corrected_value\"] = gdf[\"value\"] * gdf[\"h3_ratio\"] * formula\n", + "\n", " geom_sum_ha = []\n", " for i, row in buffer_gdf.iterrows():\n", - " filtered_gdf = buffer_gdf[i:i+1]\n", + " filtered_gdf = buffer_gdf[i : i + 1]\n", " h3_gdf = filtered_gdf.h3.polyfill_resample(resolution)\n", "\n", " h3index_list = list(h3_gdf.index)\n", - " gdf_filtered = gdf[gdf['h3index'].isin(h3index_list)]\n", - " sum_ = gdf_filtered['corrected_value'].sum()\n", + " gdf_filtered = gdf[gdf[\"h3index\"].isin(h3index_list)]\n", + " sum_ = gdf_filtered[\"corrected_value\"].sum()\n", " geom_sum_ha.append(sum_)\n", - " \n", - " buffer_gdf['sum'] = geom_sum_ha\n", - " \n", + "\n", + " buffer_gdf[\"sum\"] = geom_sum_ha\n", + "\n", " return buffer_gdf" ] }, @@ -461,7 +466,7 @@ } ], "source": [ - "#calculate deforested area\n", + "# calculate deforested area\n", "!gdal_calc.py --calc \"A*6.69019042035408517*6.69019042035408517* 0.0001\" --format GTiff --type Float32 --NoDataValue 0.0 -A \"../../datasets/raw/h3_raster_QA_calculations/satelligence data/rasters_indicators/Deforestation_IDN_2021-01-01-2022-01-01.tif\" --A_band 1 --outfile \"../../datasets/raw/h3_raster_QA_calculations/preprocessed/Deforestation_IDN_2021-01-01-2022-01-01_area_ha.tif\"" ] }, @@ -472,10 +477,10 @@ "metadata": {}, "outputs": [], "source": [ - "#preprocess carbon datasets before computing the carbon emissions\n", + "# preprocess carbon datasets before computing the carbon emissions\n", "\n", "##1. downsample carbon layer to same resolution as deforestation area file\n", - "!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -dstnodata 0.0 -tr 6e-05 6e-05 -r near -q -te 94.99998 2.1 98.29998 6.10002 -te_srs EPSG:4326 -multi -of GTiff \"../../datasets/raw/h3_raster_QA_calculations/satelligence data/rasters_indicators/AboveGroundBiomass_GLO_2001-01-01-2002-01-01.tif\" '../../datasets/raw/h3_raster_QA_calculations/preprocessed/AboveGroundBiomass_GLO_2001-01-01-2002-01-01_downsample.tif'\n" + "!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -dstnodata 0.0 -tr 6e-05 6e-05 -r near -q -te 94.99998 2.1 98.29998 6.10002 -te_srs EPSG:4326 -multi -of GTiff \"../../datasets/raw/h3_raster_QA_calculations/satelligence data/rasters_indicators/AboveGroundBiomass_GLO_2001-01-01-2002-01-01.tif\" '../../datasets/raw/h3_raster_QA_calculations/preprocessed/AboveGroundBiomass_GLO_2001-01-01-2002-01-01_downsample.tif'" ] }, { @@ -485,7 +490,7 @@ "metadata": {}, "outputs": [], "source": [ - "#get the downsampled carbon layer\n", + "# get the downsampled carbon layer\n", "!gdal_calc.py --calc \"A*B\" --format GTiff --type Float32 --q -A '../../datasets/raw/h3_raster_QA_calculations/preprocessed/AboveGroundBiomass_GLO_2001-01-01-2002-01-01_downsample.tif' --A_band 1 -B \"../../datasets/raw/h3_raster_QA_calculations/preprocessed/Deforestation_IDN_2021-01-01-2022-01-01_area_ha.tif\" --outfile '../../datasets/raw/h3_raster_QA_calculations/preprocessed/carbon_loss_T_downsample.tif'" ] }, @@ -727,16 +732,25 @@ ], "source": [ "%%time\n", - "carb_tif_downsampled = '../../datasets/raw/h3_raster_QA_calculations/preprocessed/carbon_loss_T_downsample.tif'\n", - "#gdf = get_buffer_stats(def_tif, mills, buffer=50000, stat_='sum', all_touched= True, column_name ='def_true')\n", - "#gdf = get_buffer_stats(def_tif, mills, buffer=50000, stat_='sum', all_touched= False, column_name ='def_false')\n", - "gdf = get_buffer_stats(carb_tif_downsampled, mills, buffer=50000, stat_='sum', all_touched= False, column_name ='carb_false')\n", + "carb_tif_downsampled = (\n", + " \"../../datasets/raw/h3_raster_QA_calculations/preprocessed/carbon_loss_T_downsample.tif\"\n", + ")\n", + "# gdf = get_buffer_stats(def_tif, mills, buffer=50000, stat_='sum', all_touched= True, column_name ='def_true')\n", + "# gdf = get_buffer_stats(def_tif, mills, buffer=50000, stat_='sum', all_touched= False, column_name ='def_false')\n", + "gdf = get_buffer_stats(\n", + " carb_tif_downsampled,\n", + " mills,\n", + " buffer=50000,\n", + " stat_=\"sum\",\n", + " all_touched=False,\n", + " column_name=\"carb_false\",\n", + ")\n", "\n", - "#convert to area deforested\n", - "#gdf['def_true'] = gdf['def_true']*6.69019042035408517*6.69019042035408517* 0.0001\n", - "#gdf['def_false'] = gdf['def_false']*6.69019042035408517*6.69019042035408517* 0.0001\n", + "# convert to area deforested\n", + "# gdf['def_true'] = gdf['def_true']*6.69019042035408517*6.69019042035408517* 0.0001\n", + "# gdf['def_false'] = gdf['def_false']*6.69019042035408517*6.69019042035408517* 0.0001\n", "\n", - "gdf.head()\n" + "gdf.head()" ] }, { @@ -746,8 +760,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export to csv\n", - "gdf = gdf[['gfw_fid','mill_name','deforestation', 'carbon','carb_false','geometry']]\n", + "# export to csv\n", + "gdf = gdf[[\"gfw_fid\", \"mill_name\", \"deforestation\", \"carbon\", \"carb_false\", \"geometry\"]]\n", "gdf.to_csv(\"../../datasets/raw/h3_raster_QA_calculations/preprocessed/carbon_all_touched_false.csv\")" ] }, @@ -883,14 +897,15 @@ } ], "source": [ - "gdf_filtered = gdf[['gfw_fid','deforestation', 'carbon', 'geometry' ]]\n", + "gdf_filtered = gdf[[\"gfw_fid\", \"deforestation\", \"carbon\", \"geometry\"]]\n", "\n", - "#get vector buffer for computing the h3 statistics\n", + "# get vector buffer for computing the h3 statistics\n", "gdf_filtered_buffer = get_buffer(\n", " gdf_filtered,\n", " 50000,\n", " save=True,\n", - " save_path=f\"{FILE_DIR}/satelligence data/AcehMills_indicators_50kmbuffer.shp\")\n", + " save_path=f\"{FILE_DIR}/satelligence data/AcehMills_indicators_50kmbuffer.shp\",\n", + ")\n", "gdf_filtered_buffer.head()" ] }, @@ -991,10 +1006,13 @@ "gdf_count_h3 = get_h3_vector_statistics(\n", " \"../../datasets/raw/h3_raster_QA_calculations/satelligence data/rasters_indicators/Deforestation_IDN_2021-01-01-2022-01-01_downsample_count.tif\",\n", " f\"{FILE_DIR}/satelligence data/AcehMills_indicators_50kmbuffer.shp\",\n", - " column='def_estimated_count',\n", - " resolution=6)\n", - "#save\n", - "gdf_count_h3.to_csv(\"../../datasets/raw/h3_raster_QA_calculations/statistics/deforestation_count.csv\")\n", + " column=\"def_estimated_count\",\n", + " resolution=6,\n", + ")\n", + "# save\n", + "gdf_count_h3.to_csv(\n", + " \"../../datasets/raw/h3_raster_QA_calculations/statistics/deforestation_count.csv\"\n", + ")\n", "gdf_count_h3.head()" ] }, @@ -1005,13 +1023,13 @@ "metadata": {}, "outputs": [], "source": [ - "#gdf_buffer50km = gpd.read_file(f\"{FILE_DIR}/satelligence data/AcehMills_indicators_50kmbuffer.shp\")\n", + "# gdf_buffer50km = gpd.read_file(f\"{FILE_DIR}/satelligence data/AcehMills_indicators_50kmbuffer.shp\")\n", "#\n", - "#geom_sum_ha = []\n", - "#for i, row in gdf_buffer50km.iterrows():\n", + "# geom_sum_ha = []\n", + "# for i, row in gdf_buffer50km.iterrows():\n", "# filtered_gdf = gdf_buffer50km[i:i+1]\n", "# h3_gdf = filtered_gdf.h3.polyfill_resample(6)\n", - "# \n", + "#\n", "# #h3index_list = [f'0x{h3index}' for h3index in h3_gdf.index]\n", "# h3_list = h3_gdf.index\n", "# hex_area = []\n", @@ -1022,7 +1040,7 @@ "# #h3_area_sum = sum(hex_area)\n", "# #geom_sum_ha.append(h3_area_sum)\n", "# break\n", - "#h3_gdf.head()" + "# h3_gdf.head()" ] }, { @@ -1145,9 +1163,10 @@ "gdf_sum_h3 = get_h3_vector_statistics(\n", " \"../../datasets/raw/h3_raster_QA_calculations/preprocessed/Deforestation_IDN_2021-01-01-2022-01-01_area_ha_sum.tif\",\n", " f\"{FILE_DIR}/satelligence data/AcehMills_indicators_50kmbuffer.shp\",\n", - " column='def_estimated_sum',\n", - " resolution=6)\n", - "#save\n", + " column=\"def_estimated_sum\",\n", + " resolution=6,\n", + ")\n", + "# save\n", "gdf_sum_h3.to_csv(\"../../datasets/raw/h3_raster_QA_calculations/statistics/deforestation_sum.csv\")\n", "gdf_sum_h3.head()" ] @@ -1173,7 +1192,7 @@ "metadata": {}, "outputs": [], "source": [ - "#clip area raster by Ache, Indonesia extension\n", + "# clip area raster by Ache, Indonesia extension\n", "!gdal_translate -projwin 90.0 10.0 100.0 0.0 -q -a_nodata 0.0 -of GTiff \"../../datasets/raw/h3_raster_QA_calculations/h3_area_correction/8_Areakm.tif\" \"../../datasets/raw/h3_raster_QA_calculations/h3_area_correction/8_Areakm_clip.tif\"" ] }, @@ -1271,11 +1290,13 @@ } ], "source": [ - "def_count_h3_gdf = get_zonal_stats_correction_factor(raster_path=\"../../datasets/raw/h3_raster_QA_calculations/satelligence data/rasters_indicators/Deforestation_IDN_2021-01-01-2022-01-01_downsample_count.tif\",\n", - " corrected_area_gdf=area_h3_gdf,\n", - " resolution=6,\n", - " buffer_gdf=gdf_buffer50km,\n", - " formula=6.69019042035408*6.69019042035408* 0.0001)\n", + "def_count_h3_gdf = get_zonal_stats_correction_factor(\n", + " raster_path=\"../../datasets/raw/h3_raster_QA_calculations/satelligence data/rasters_indicators/Deforestation_IDN_2021-01-01-2022-01-01_downsample_count.tif\",\n", + " corrected_area_gdf=area_h3_gdf,\n", + " resolution=6,\n", + " buffer_gdf=gdf_buffer50km,\n", + " formula=6.69019042035408 * 6.69019042035408 * 0.0001,\n", + ")\n", "def_count_h3_gdf.head()" ] }, @@ -1405,11 +1426,14 @@ ], "source": [ "carbon_gdf_no_area = get_h3_vector_statistics(\n", - " '../../datasets/raw/h3_raster_QA_calculations/preprocessed/carbon_loss_T_downsample_sum_v2.tif',\n", + " \"../../datasets/raw/h3_raster_QA_calculations/preprocessed/carbon_loss_T_downsample_sum_v2.tif\",\n", " f\"{FILE_DIR}/satelligence data/AcehMills_indicators_50kmbuffer.shp\",\n", - " column='carb_estimated_sum',\n", - " resolution=6)\n", - "carbon_gdf_no_area.to_csv(\"../../datasets/raw/h3_raster_QA_calculations/statistics/carbon_loss_T_downsample_sum_no_corrected_h3.csv\")\n", + " column=\"carb_estimated_sum\",\n", + " resolution=6,\n", + ")\n", + "carbon_gdf_no_area.to_csv(\n", + " \"../../datasets/raw/h3_raster_QA_calculations/statistics/carbon_loss_T_downsample_sum_no_corrected_h3.csv\"\n", + ")\n", "carbon_gdf_no_area.head()" ] }, @@ -1508,7 +1532,8 @@ ], "source": [ "carbon_gdf = get_zonal_stats_correction_factor(\n", - "'../../datasets/raw/h3_raster_QA_calculations/preprocessed/carbon_loss_T_downsample_sum_v2.tif')\n", + " \"../../datasets/raw/h3_raster_QA_calculations/preprocessed/carbon_loss_T_downsample_sum_v2.tif\"\n", + ")\n", "carbon_gdf.head()" ] }, @@ -1519,7 +1544,9 @@ "metadata": {}, "outputs": [], "source": [ - "carbon_gdf.to_csv(\"../../datasets/raw/h3_raster_QA_calculations/statistics/carbon_loss_T_downsample_sum.csv\")" + "carbon_gdf.to_csv(\n", + " \"../../datasets/raw/h3_raster_QA_calculations/statistics/carbon_loss_T_downsample_sum.csv\"\n", + ")" ] }, { @@ -1543,15 +1570,15 @@ "metadata": {}, "outputs": [], "source": [ - "#get sum of harvest area\n", - "#%%time\n", + "# get sum of harvest area\n", + "# %%time\n", "oilp_harvest_area = f\"{FILE_DIR}/core_indicators/materials/palm_oil_harvest_area_ha_clip_v3.tif\"\n", "oilp_production = f\"{FILE_DIR}/core_indicators/materials/palm_oil_production_t_clip_v3.tif\"\n", - "#mills\n", - "#gdf = get_buffer_stats(oilp_production, mills, stat_='sum', all_touched= True, column_name ='production_true')\n", - "#gdf.to_csv(\"../../datasets/raw/h3_raster_QA_calculations/statistics/production_true.csv\")\n", + "# mills\n", + "# gdf = get_buffer_stats(oilp_production, mills, stat_='sum', all_touched= True, column_name ='production_true')\n", + "# gdf.to_csv(\"../../datasets/raw/h3_raster_QA_calculations/statistics/production_true.csv\")\n", "##\n", - "#gdf.head()" + "# gdf.head()" ] }, { @@ -1648,14 +1675,16 @@ } ], "source": [ - "#get area corrected zonal statstics\n", - "h3_gdf = get_zonal_stats_correction_factor(raster_path=oilp_harvest_area,\n", - " corrected_area_gdf=area_h3_gdf,\n", - " resolution=6,\n", - " buffer_gdf=gdf_buffer50km,\n", - " formula=1)\n", + "# get area corrected zonal statstics\n", + "h3_gdf = get_zonal_stats_correction_factor(\n", + " raster_path=oilp_harvest_area,\n", + " corrected_area_gdf=area_h3_gdf,\n", + " resolution=6,\n", + " buffer_gdf=gdf_buffer50km,\n", + " formula=1,\n", + ")\n", "h3_gdf.to_csv(\"../../datasets/raw/h3_raster_QA_calculations/statistics/oilp_harvest_area_ha.csv\")\n", - "#h3_gdf.to_csv(\"../../datasets/raw/h3_raster_QA_calculations/statistics/oilp_production_p.csv\")\n", + "# h3_gdf.to_csv(\"../../datasets/raw/h3_raster_QA_calculations/statistics/oilp_production_p.csv\")\n", "h3_gdf.head()" ] }, @@ -1753,15 +1782,18 @@ } ], "source": [ - "#compare against the no area corrected\n", + "# compare against the no area corrected\n", "\n", "h3_gdf = get_h3_vector_statistics(\n", " oilp_production,\n", " f\"{FILE_DIR}/satelligence data/AcehMills_indicators_50kmbuffer.shp\",\n", - " column='h3_estimated_sum',\n", - " resolution=6)\n", - "#h3_gdf.to_csv(\"../../datasets/raw/h3_raster_QA_calculations/statistics/oilp_harvest_area_no_corrected_h3.csv\")\n", - "h3_gdf.to_csv(\"../../datasets/raw/h3_raster_QA_calculations/statistics/oilp_prod_no_corrected_h3.csv\")\n", + " column=\"h3_estimated_sum\",\n", + " resolution=6,\n", + ")\n", + "# h3_gdf.to_csv(\"../../datasets/raw/h3_raster_QA_calculations/statistics/oilp_harvest_area_no_corrected_h3.csv\")\n", + "h3_gdf.to_csv(\n", + " \"../../datasets/raw/h3_raster_QA_calculations/statistics/oilp_prod_no_corrected_h3.csv\"\n", + ")\n", "h3_gdf.head()" ] }, @@ -1802,12 +1834,11 @@ " transform = src.transform\n", " arr = src.read(1)\n", " orig_crs = src.crs\n", - " \n", - "#if orig_crs.is_geographic:\n", + "\n", + "# if orig_crs.is_geographic:\n", "# y_size_km = -transform[4] / 1000\n", - "# \n", - "#radius_in_pixels = int(radius / y_size_km)\n", - "\n" + "#\n", + "# radius_in_pixels = int(radius / y_size_km)" ] }, { @@ -1817,9 +1848,8 @@ "metadata": {}, "outputs": [], "source": [ - "#as I'm having issues with memory, I'm going to use a randon kernel as I just want to test the difference between the raster and h3 calculations\n", - "kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, ksize=(10000,\n", - " 10000))" + "# as I'm having issues with memory, I'm going to use a randon kernel as I just want to test the difference between the raster and h3 calculations\n", + "kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, ksize=(10000, 10000))" ] }, { @@ -2079,18 +2109,18 @@ } ], "source": [ - "#get zonal statistics in geometries with raster with kernnel corrected and no corrected by area\n", + "# get zonal statistics in geometries with raster with kernnel corrected and no corrected by area\n", "\n", - "#no corrected by area\n", + "# no corrected by area\n", "\n", "gdf_lg_def = get_buffer_stats(\n", " \"../../datasets/raw/h3_raster_QA_calculations/preprocessed/def_area_kernnel.tif\",\n", " mills,\n", " buffer=50000,\n", - " stat_='sum',\n", - " all_touched= True,\n", - " column_name ='lg_def_val_noarea'\n", - " )\n", + " stat_=\"sum\",\n", + " all_touched=True,\n", + " column_name=\"lg_def_val_noarea\",\n", + ")\n", "\n", "gdf_lg_def.to_csv(\"../../datasets/raw/h3_raster_QA_calculations/statistics/lg_def_ha.csv\")\n", "\n", @@ -2194,10 +2224,13 @@ "gdf_no_area_lg_def = get_h3_vector_statistics(\n", " \"../../datasets/raw/h3_raster_QA_calculations/preprocessed/def_area_kernnel.tif\",\n", " f\"{FILE_DIR}/satelligence data/AcehMills_indicators_50kmbuffer.shp\",\n", - " column='def_estimated_count',\n", - " resolution=6)\n", - "#save\n", - "gdf_no_area_lg_def.to_csv(\"../../datasets/raw/h3_raster_QA_calculations/statistics/lg_def_no_area_ha_h3.csv\")\n", + " column=\"def_estimated_count\",\n", + " resolution=6,\n", + ")\n", + "# save\n", + "gdf_no_area_lg_def.to_csv(\n", + " \"../../datasets/raw/h3_raster_QA_calculations/statistics/lg_def_no_area_ha_h3.csv\"\n", + ")\n", "gdf_no_area_lg_def.head()" ] }, @@ -2295,14 +2328,18 @@ } ], "source": [ - "#get corrected data stadistics\n", - "\n", - "gdf_area_lg_def = get_zonal_stats_correction_factor(raster_path=\"../../datasets/raw/h3_raster_QA_calculations/preprocessed/def_area_kernnel.tif\",\n", - " corrected_area_gdf=area_h3_gdf,\n", - " resolution=6,\n", - " buffer_gdf=gdf_buffer50km,\n", - " formula=1)\n", - "gdf_area_lg_def.to_csv(\"../../datasets/raw/h3_raster_QA_calculations/statistics/lg_def_area_ha_h3.csv\")\n", + "# get corrected data stadistics\n", + "\n", + "gdf_area_lg_def = get_zonal_stats_correction_factor(\n", + " raster_path=\"../../datasets/raw/h3_raster_QA_calculations/preprocessed/def_area_kernnel.tif\",\n", + " corrected_area_gdf=area_h3_gdf,\n", + " resolution=6,\n", + " buffer_gdf=gdf_buffer50km,\n", + " formula=1,\n", + ")\n", + "gdf_area_lg_def.to_csv(\n", + " \"../../datasets/raw/h3_raster_QA_calculations/statistics/lg_def_area_ha_h3.csv\"\n", + ")\n", "\n", "gdf_area_lg_def.head()" ] diff --git a/data/notebooks/Lab/QA_h3_wr_data.ipynb b/data/notebooks/Lab/QA_h3_wr_data.ipynb index 4ada02676..c4ff9c83d 100644 --- a/data/notebooks/Lab/QA_h3_wr_data.ipynb +++ b/data/notebooks/Lab/QA_h3_wr_data.ipynb @@ -77,7 +77,7 @@ " env_key, _val = line.split(\"=\", 1)\n", " env_value = _val.split(\"\\n\")[0]\n", " env[env_key] = env_value\n", - " \n", + "\n", "list(env.keys())" ] }, @@ -88,12 +88,14 @@ "metadata": {}, "outputs": [], "source": [ - "postgres_thread_pool = ThreadedConnectionPool(1, 50,\n", - " host=env['API_POSTGRES_HOST'],\n", - " port=env['API_POSTGRES_PORT'],\n", - " user=env['API_POSTGRES_USERNAME'],\n", - " password=env['API_POSTGRES_PASSWORD']\n", - " )" + "postgres_thread_pool = ThreadedConnectionPool(\n", + " 1,\n", + " 50,\n", + " host=env[\"API_POSTGRES_HOST\"],\n", + " port=env[\"API_POSTGRES_PORT\"],\n", + " user=env[\"API_POSTGRES_USERNAME\"],\n", + " password=env[\"API_POSTGRES_PASSWORD\"],\n", + ")" ] }, { @@ -141,7 +143,7 @@ "cursor = conn.cursor()\n", "\n", "\n", - "## NOTE: The same logic for the and indicators, materials and admin regions would be applied to the supplier. \n", + "## NOTE: The same logic for the and indicators, materials and admin regions would be applied to the supplier.\n", "# As all the data is null, I'm not filtering by anything in this case\n", "cursor.execute(sql)\n", "\n", diff --git a/data/notebooks/Lab/QA_raster_to_h3_data.ipynb b/data/notebooks/Lab/QA_raster_to_h3_data.ipynb index 2200abf31..c97be3923 100644 --- a/data/notebooks/Lab/QA_raster_to_h3_data.ipynb +++ b/data/notebooks/Lab/QA_raster_to_h3_data.ipynb @@ -24,7 +24,7 @@ "metadata": {}, "outputs": [], "source": [ - "#Install if needed\n", + "# Install if needed\n", "#!pip install h3 --user\n", "#!pip install h3ronpy --user" ] @@ -36,20 +36,10 @@ "metadata": {}, "outputs": [], "source": [ - "#Import libraries\n", - "import h3\n", - "from h3ronpy import raster\n", + "# Import libraries\n", "import rasterio as rio\n", "import rasterio.plot\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "from rasterstats import gen_zonal_stats, gen_point_query\n", - "from rasterstats import zonal_stats\n", - "import pandas as pd\n", - "import geopandas as gpd\n", - "import json\n", - "import os\n", - "from shapely.geometry import shape, mapping, box, Point, LinearRing, Polygon\n" + "from h3ronpy import raster" ] }, { @@ -59,9 +49,9 @@ "metadata": {}, "outputs": [], "source": [ - "prod_raster = '../../datasets/processed/h3_test/cotton_production_ind.tif'\n", + "prod_raster = \"../../datasets/processed/h3_test/cotton_production_ind.tif\"\n", "\n", - "test_area = (65,4,100,40)" + "test_area = (65, 4, 100, 40)" ] }, { @@ -111,7 +101,7 @@ } ], "source": [ - "#Check if raster (production) has all the info right\n", + "# Check if raster (production) has all the info right\n", "!gdalinfo $prod_raster" ] }, @@ -253,9 +243,15 @@ " transform = rio.windows.transform(window, src.transform)\n", " print(src.profile)\n", " rio.plot.show(src.read(window=window, masked=True))\n", - " gdf = raster.raster_to_geodataframe(src.read(1, window=window), transform, h3_resolution=4, nodata_value=int(src.profile['nodata']), compacted=False)\n", + " gdf = raster.raster_to_geodataframe(\n", + " src.read(1, window=window),\n", + " transform,\n", + " h3_resolution=4,\n", + " nodata_value=int(src.profile[\"nodata\"]),\n", + " compacted=False,\n", + " )\n", "\n", - "gdf.plot('value')\n", + "gdf.plot(\"value\")\n", "gdf.head()" ] }, @@ -274,11 +270,11 @@ "metadata": {}, "outputs": [], "source": [ - "src=rio.open(prod_raster)\n", + "src = rio.open(prod_raster)\n", "window = rio.windows.from_bounds(*test_area, src.transform)\n", "\n", - "array=src.read(window=window)\n", - "prod_df=array[0].ravel()\n", + "array = src.read(window=window)\n", + "prod_df = array[0].ravel()\n", "rst_m = round(prod_df[prod_df > 0].mean(), 2)\n", "rst_s = round(prod_df[prod_df > 0].std(), 2)" ] @@ -298,8 +294,8 @@ "metadata": {}, "outputs": [], "source": [ - "h3_m = round(gdf['value'].mean(), 2)\n", - "h3_s = round(gdf['value'].std(), 2)" + "h3_m = round(gdf[\"value\"].mean(), 2)\n", + "h3_s = round(gdf[\"value\"].std(), 2)" ] }, { @@ -318,8 +314,8 @@ } ], "source": [ - "print(f'Raster PRODUCTION mean value: {rst_m:.2f} std. dev.:{rst_s:.2f}')\n", - "print(f'H3 map PRODUCTION mean value: {h3_m} std. dev.:{h3_s}')" + "print(f\"Raster PRODUCTION mean value: {rst_m:.2f} std. dev.:{rst_s:.2f}\")\n", + "print(f\"H3 map PRODUCTION mean value: {h3_m} std. dev.:{h3_s}\")" ] }, { @@ -451,17 +447,23 @@ } ], "source": [ - "risk_raster = '../../datasets/processed/h3_test/wr_cotton_india.tif'\n", + "risk_raster = \"../../datasets/processed/h3_test/wr_cotton_india.tif\"\n", "\n", "with rio.open(risk_raster) as src:\n", " window = rio.windows.from_bounds(*test_area, src.transform)\n", " transform = rio.windows.transform(window, src.transform)\n", " print(src.profile)\n", " rio.plot.show(src.read(window=window, masked=True))\n", - " gdf = raster.raster_to_geodataframe(src.read(1, window=window), transform, h3_resolution=4, nodata_value=int(src.profile['nodata']), compacted=False)\n", + " gdf = raster.raster_to_geodataframe(\n", + " src.read(1, window=window),\n", + " transform,\n", + " h3_resolution=4,\n", + " nodata_value=int(src.profile[\"nodata\"]),\n", + " compacted=False,\n", + " )\n", "\n", - "gdf.plot('value')\n", - "#gdf['h3index'] = gdf['h3index'].apply(hex)\n", + "gdf.plot(\"value\")\n", + "# gdf['h3index'] = gdf['h3index'].apply(hex)\n", "gdf.head()" ] }, @@ -481,19 +483,19 @@ } ], "source": [ - "src=rio.open(risk_raster)\n", + "src = rio.open(risk_raster)\n", "window = rio.windows.from_bounds(*test_area, src.transform)\n", "\n", - "array=src.read(window=window)\n", - "risk_df=array[0].ravel()\n", + "array = src.read(window=window)\n", + "risk_df = array[0].ravel()\n", "rst_m = round(risk_df[risk_df > 0].mean(), 3)\n", "rst_s = round(risk_df[risk_df > 0].std(), 3)\n", "\n", - "h3_m = round(gdf['value'].mean(), 3)\n", - "h3_s = round(gdf['value'].std(), 3)\n", + "h3_m = round(gdf[\"value\"].mean(), 3)\n", + "h3_s = round(gdf[\"value\"].std(), 3)\n", "\n", - "print(f'Raster RISK mean value: {rst_m:.3f} std. dev.:{rst_s:.3f}')\n", - "print(f'H3 map RISK mean value: {h3_m} std. dev.:{h3_s}')" + "print(f\"Raster RISK mean value: {rst_m:.3f} std. dev.:{rst_s:.3f}\")\n", + "print(f\"H3 map RISK mean value: {h3_m} std. dev.:{h3_s}\")" ] }, { @@ -625,17 +627,23 @@ } ], "source": [ - "impact_raster = '../../datasets/processed/h3_test/water_impact_cotton_ind_m3yr.tif'\n", + "impact_raster = \"../../datasets/processed/h3_test/water_impact_cotton_ind_m3yr.tif\"\n", "\n", "with rio.open(impact_raster) as src:\n", " window = rio.windows.from_bounds(*test_area, src.transform)\n", " transform = rio.windows.transform(window, src.transform)\n", " print(src.profile)\n", " rio.plot.show(src.read(window=window, masked=True))\n", - " gdf = raster.raster_to_geodataframe(src.read(1, window=window), transform, h3_resolution=4, nodata_value=int(src.profile['nodata']), compacted=False)\n", + " gdf = raster.raster_to_geodataframe(\n", + " src.read(1, window=window),\n", + " transform,\n", + " h3_resolution=4,\n", + " nodata_value=int(src.profile[\"nodata\"]),\n", + " compacted=False,\n", + " )\n", "\n", - "gdf.plot('value')\n", - "#gdf['h3index'] = gdf['h3index'].apply(hex)\n", + "gdf.plot(\"value\")\n", + "# gdf['h3index'] = gdf['h3index'].apply(hex)\n", "gdf.head()" ] }, @@ -655,19 +663,19 @@ } ], "source": [ - "src=rio.open(impact_raster)\n", + "src = rio.open(impact_raster)\n", "window = rio.windows.from_bounds(*test_area, src.transform)\n", "\n", - "array=src.read(window=window)\n", - "impact_df=array[0].ravel()\n", + "array = src.read(window=window)\n", + "impact_df = array[0].ravel()\n", "rst_m = round(impact_df[impact_df > 0].mean(), 3)\n", "rst_s = round(impact_df[impact_df > 0].std(), 3)\n", "\n", - "h3_m = round(gdf['value'].mean(), 3)\n", - "h3_s = round(gdf['value'].std(), 3)\n", + "h3_m = round(gdf[\"value\"].mean(), 3)\n", + "h3_s = round(gdf[\"value\"].std(), 3)\n", "\n", - "print(f'Raster IMPACT mean value: {rst_m:.3f} std. dev.:{rst_s:.3f}')\n", - "print(f'H3 map IMPACT mean value: {h3_m} std. dev.:{h3_s}')\n" + "print(f\"Raster IMPACT mean value: {rst_m:.3f} std. dev.:{rst_s:.3f}\")\n", + "print(f\"H3 map IMPACT mean value: {h3_m} std. dev.:{h3_s}\")" ] }, { @@ -807,17 +815,23 @@ } ], "source": [ - "prod_raster = '../../datasets/processed/h3_test/cotton_production_ind.tif'\n", + "prod_raster = \"../../datasets/processed/h3_test/cotton_production_ind.tif\"\n", "with rio.open(prod_raster) as src:\n", " window = rio.windows.from_bounds(*test_area, src.transform)\n", " transform = rio.windows.transform(window, src.transform)\n", "\n", - " gdf = raster.raster_to_geodataframe(src.read(1, window=window), transform, h3_resolution=6, nodata_value=int(src.profile['nodata']), compacted=False)\n", + " gdf = raster.raster_to_geodataframe(\n", + " src.read(1, window=window),\n", + " transform,\n", + " h3_resolution=6,\n", + " nodata_value=int(src.profile[\"nodata\"]),\n", + " compacted=False,\n", + " )\n", "\n", "# Cast 'h3index' numeric value as hexadecimal value and set as index\n", - "gdf['h3index'] = gdf['h3index'].apply(lambda x: hex(x)[2:])\n", - "gdf.index = gdf['h3index']\n", - "gdf.drop(columns='h3index', inplace=True)\n", + "gdf[\"h3index\"] = gdf[\"h3index\"].apply(lambda x: hex(x)[2:])\n", + "gdf.index = gdf[\"h3index\"]\n", + "gdf.drop(columns=\"h3index\", inplace=True)\n", "gdf" ] }, @@ -836,7 +850,7 @@ "metadata": {}, "outputs": [], "source": [ - "gdf.to_file('../../datasets/processed/h3_test/cotton_production_ind.geojson', driver=\"GeoJSON\")" + "gdf.to_file(\"../../datasets/processed/h3_test/cotton_production_ind.geojson\", driver=\"GeoJSON\")" ] }, { @@ -854,7 +868,9 @@ "metadata": {}, "outputs": [], "source": [ - "gdf.drop('geometry', axis=1).to_csv('../../datasets/processed/h3_test/cotton_production_ind.csv', index=True)" + "gdf.drop(\"geometry\", axis=1).to_csv(\n", + " \"../../datasets/processed/h3_test/cotton_production_ind.csv\", index=True\n", + ")" ] }, { @@ -872,20 +888,28 @@ "metadata": {}, "outputs": [], "source": [ - "risk_raster = '../../datasets/processed/h3_test/wr_cotton_india.tif'\n", + "risk_raster = \"../../datasets/processed/h3_test/wr_cotton_india.tif\"\n", "with rio.open(risk_raster) as src:\n", " window = rio.windows.from_bounds(*test_area, src.transform)\n", " transform = rio.windows.transform(window, src.transform)\n", "\n", - " gdf = raster.raster_to_geodataframe(src.read(1, window=window), transform, h3_resolution=6, nodata_value=int(src.profile['nodata']), compacted=False)\n", + " gdf = raster.raster_to_geodataframe(\n", + " src.read(1, window=window),\n", + " transform,\n", + " h3_resolution=6,\n", + " nodata_value=int(src.profile[\"nodata\"]),\n", + " compacted=False,\n", + " )\n", "\n", "# Cast 'h3index' numeric value as hexadecimal value and set as index\n", - "gdf['h3index'] = gdf['h3index'].apply(lambda x: hex(x)[2:])\n", - "gdf.index = gdf['h3index']\n", - "gdf.drop(columns='h3index', inplace=True)\n", + "gdf[\"h3index\"] = gdf[\"h3index\"].apply(lambda x: hex(x)[2:])\n", + "gdf.index = gdf[\"h3index\"]\n", + "gdf.drop(columns=\"h3index\", inplace=True)\n", "\n", - "gdf.to_file('../../datasets/processed/h3_test/wr_cotton_india.geojson', driver=\"GeoJSON\")\n", - "gdf.drop('geometry', axis=1).to_csv('../../datasets/processed/h3_test/wr_cotton_india.csv', index=True)" + "gdf.to_file(\"../../datasets/processed/h3_test/wr_cotton_india.geojson\", driver=\"GeoJSON\")\n", + "gdf.drop(\"geometry\", axis=1).to_csv(\n", + " \"../../datasets/processed/h3_test/wr_cotton_india.csv\", index=True\n", + ")" ] }, { @@ -903,20 +927,30 @@ "metadata": {}, "outputs": [], "source": [ - "impact_raster = '../../datasets/processed/h3_test/water_impact_cotton_ind_m3yr.tif'\n", + "impact_raster = \"../../datasets/processed/h3_test/water_impact_cotton_ind_m3yr.tif\"\n", "with rio.open(impact_raster) as src:\n", " window = rio.windows.from_bounds(*test_area, src.transform)\n", " transform = rio.windows.transform(window, src.transform)\n", "\n", - " gdf = raster.raster_to_geodataframe(src.read(1, window=window), transform, h3_resolution=6, nodata_value=int(src.profile['nodata']), compacted=False)\n", + " gdf = raster.raster_to_geodataframe(\n", + " src.read(1, window=window),\n", + " transform,\n", + " h3_resolution=6,\n", + " nodata_value=int(src.profile[\"nodata\"]),\n", + " compacted=False,\n", + " )\n", "\n", "# Cast 'h3index' numeric value as hexadecimal value and set as index\n", - "gdf['h3index'] = gdf['h3index'].apply(lambda x: hex(x)[2:])\n", - "gdf.index = gdf['h3index']\n", - "gdf.drop(columns='h3index', inplace=True)\n", + "gdf[\"h3index\"] = gdf[\"h3index\"].apply(lambda x: hex(x)[2:])\n", + "gdf.index = gdf[\"h3index\"]\n", + "gdf.drop(columns=\"h3index\", inplace=True)\n", "\n", - "gdf.to_file('../../datasets/processed/h3_test/water_impact_cotton_ind_m3yr.geojson', driver=\"GeoJSON\")\n", - "gdf.drop('geometry', axis=1).to_csv('../../datasets/processed/h3_test/water_impact_cotton_ind_m3yr.csv', index=True)" + "gdf.to_file(\n", + " \"../../datasets/processed/h3_test/water_impact_cotton_ind_m3yr.geojson\", driver=\"GeoJSON\"\n", + ")\n", + "gdf.drop(\"geometry\", axis=1).to_csv(\n", + " \"../../datasets/processed/h3_test/water_impact_cotton_ind_m3yr.csv\", index=True\n", + ")" ] }, { diff --git a/data/notebooks/Lab/QA_risk_impact_calc_blwf.ipynb b/data/notebooks/Lab/QA_risk_impact_calc_blwf.ipynb index c4dc1fc3f..94c634cb1 100644 --- a/data/notebooks/Lab/QA_risk_impact_calc_blwf.ipynb +++ b/data/notebooks/Lab/QA_risk_impact_calc_blwf.ipynb @@ -30,16 +30,15 @@ "outputs": [], "source": [ "# import libraries\n", + "import time\n", + "\n", "import geopandas as gpd\n", + "import matplotlib.colors as colors\n", + "import matplotlib.pyplot as plt\n", + "import numpy\n", "import rasterio as rio\n", "import rasterio.plot\n", - "import matplotlib.pyplot as plt\n", "from matplotlib.colors import ListedColormap\n", - "import matplotlib.colors as colors\n", - "import numpy\n", - "\n", - "import time\n", - "\n", "from rasterstats import zonal_stats" ] }, @@ -50,8 +49,8 @@ "metadata": {}, "outputs": [], "source": [ - "#define path\n", - "path = '../../datasets/raw/wf/QA/'" + "# define path\n", + "path = \"../../datasets/raw/wf/QA/\"" ] }, { @@ -120,8 +119,8 @@ } ], "source": [ - "#import geometry:\n", - "geom = gpd.read_file(path+'gadm36_IND_0.shp')\n", + "# import geometry:\n", + "geom = gpd.read_file(path + \"gadm36_IND_0.shp\")\n", "geom.head()" ] }, @@ -258,14 +257,14 @@ "# Define a normalization from values -> colors\n", "norm = colors.BoundaryNorm([0, 10, 100, 1000, 7000], 5)\n", "\n", - "with rio.open(path + 'cotton/cotton_Production.tif') as src:\n", + "with rio.open(path + \"cotton/cotton_Production.tif\") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((4,40))\n", - " ax.set_xlim((65,100))\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((4, 40))\n", + " ax.set_xlim((65, 100))\n", " rio.plot.show(dat, norm=norm, cmap=cmap, ax=ax, transform=src.transform)\n", - " geom.plot(ax=ax, color='red', alpha=.1, edgecolor='red')\n", - " ax.set_title('Production of cotton in India in tonnes')" + " geom.plot(ax=ax, color=\"red\", alpha=0.1, edgecolor=\"red\")\n", + " ax.set_title(\"Production of cotton in India in tonnes\")" ] }, { @@ -292,16 +291,16 @@ "cmap = ListedColormap([\"#ffffff\", \"#73b3d8\", \"#2879b9\", \"#08306b\"])\n", "\n", "# Define a normalization from values -> colors\n", - "norm = colors.BoundaryNorm([0,29584100, 863202440, 10063202440, 105581714153], 5)\n", + "norm = colors.BoundaryNorm([0, 29584100, 863202440, 10063202440, 105581714153], 5)\n", "\n", - "with rio.open(path + 'bl_wf_mmyr_area.tif') as src:\n", + "with rio.open(path + \"bl_wf_mmyr_area.tif\") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((4,40))\n", - " ax.set_xlim((65,100))\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((4, 40))\n", + " ax.set_xlim((65, 100))\n", " rio.plot.show(dat, norm=norm, cmap=cmap, ax=ax, transform=src.transform)\n", - " geom.plot(ax=ax, color='red', alpha=.1, edgecolor='red')\n", - " ax.set_title('Production of cotton in India in tonnes')" + " geom.plot(ax=ax, color=\"red\", alpha=0.1, edgecolor=\"red\")\n", + " ax.set_title(\"Production of cotton in India in tonnes\")" ] }, { @@ -323,12 +322,12 @@ "source": [ "## obtain total production for cotton so we can normalise the water footprint\n", "src = rio.open(path + \"cotton/cotton_Production.tif\")\n", - "print('shape:',src.shape)\n", - "print('noData:',src.nodata)\n", + "print(\"shape:\", src.shape)\n", + "print(\"noData:\", src.nodata)\n", "\n", - "array= src.read()\n", + "array = src.read()\n", "\n", - "print(f'Total production of cotton: {array.sum()} tonnes')\n" + "print(f\"Total production of cotton: {array.sum()} tonnes\")" ] }, { @@ -347,7 +346,7 @@ } ], "source": [ - "#clip raster by extent:\n", + "# clip raster by extent:\n", "!gdal_translate -projwin -179.99166665 83.08834447 180.00836215 -55.91166665 -of GTiff $path\"cotton/cotton_Production.tif\" $path\"cotton/cotton_Production_extent.tif\"" ] }, @@ -368,8 +367,8 @@ } ], "source": [ - "#calculate risk:\n", - "#Remove production lower than 0 to avoid inconsistencies\n", + "# calculate risk:\n", + "# Remove production lower than 0 to avoid inconsistencies\n", "!gdal_calc.py --calc \"((A*0.001)/51100000)*(B>1)\" --format GTiff --type Float32 --NoDataValue 0.0 -A $path'bl_wf_mmyr_area.tif' --A_band 1 -B $path'cotton/cotton_Production_extent.tif' --outfile $path'wr_cotton.tif'" ] }, @@ -396,19 +395,19 @@ ], "source": [ "# Define the colors you want\n", - "cmap = ListedColormap([\"#ffffff\", \"#fed98e\", \"#fe9929\", \"#d95f0e\" ])\n", + "cmap = ListedColormap([\"#ffffff\", \"#fed98e\", \"#fe9929\", \"#d95f0e\"])\n", "\n", "# Define a normalization from values -> colors\n", - "norm = colors.BoundaryNorm([0,0.1, 1, 1.5, 2], 5)\n", + "norm = colors.BoundaryNorm([0, 0.1, 1, 1.5, 2], 5)\n", "\n", - "with rio.open(path + 'wr_cotton.tif') as src:\n", + "with rio.open(path + \"wr_cotton.tif\") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((4,40))\n", - " ax.set_xlim((65,100))\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((4, 40))\n", + " ax.set_xlim((65, 100))\n", " rio.plot.show(dat, norm=norm, cmap=cmap, ax=ax, transform=src.transform)\n", - " geom.plot(ax=ax, color='red', alpha=.1, edgecolor='red')\n", - " ax.set_title('Water risk for cotton in m3/tonnes*year')" + " geom.plot(ax=ax, color=\"red\", alpha=0.1, edgecolor=\"red\")\n", + " ax.set_title(\"Water risk for cotton in m3/tonnes*year\")" ] }, { @@ -434,7 +433,7 @@ "metadata": {}, "outputs": [], "source": [ - "src = rio.open(path+'gadm36_IND_0.tif')" + "src = rio.open(path + \"gadm36_IND_0.tif\")" ] }, { @@ -474,7 +473,7 @@ } ], "source": [ - "#clip raster by extent:\n", + "# clip raster by extent:\n", "!gdal_translate -projwin -179.99166665 83.097781811 180.00836215000004 -55.902229309000006 -of GTiff $path\"cotton/cotton_HarvestedAreaHectares.tif\" $path\"cotton/cotton_HarvestedAreaHectares_extent.tif\"" ] }, @@ -495,7 +494,7 @@ } ], "source": [ - "#get harvest area total in india\n", + "# get harvest area total in india\n", "!gdal_calc.py --calc \"A*B\" --format GTiff --type Float32 --NoDataValue 0.0 -A $path'gadm36_IND_0.tif' --A_band 1 -B $path'cotton/cotton_HarvestedAreaHectares_extent.tif' --outfile $path'cotton_hahectares_ind_v3.tif'" ] }, @@ -516,7 +515,7 @@ } ], "source": [ - "#get production total in india\n", + "# get production total in india\n", "!gdal_calc.py --calc \"A*B\" --format GTiff --type Float32 --NoDataValue 0.0 -A $path'gadm36_IND_0.tif' --A_band 1 -B $path'cotton/cotton_Production_extent.tif' --outfile $path'cotton_production_ind.tif'" ] }, @@ -539,15 +538,15 @@ "source": [ "## obtain total production for cotton so we can normalise the water footprint\n", "src = rio.open(path + \"cotton_hahectares_ind_v3.tif\")\n", - "print('shape:',src.shape)\n", - "print('noData:',src.nodata)\n", + "print(\"shape:\", src.shape)\n", + "print(\"noData:\", src.nodata)\n", "\n", - "array= src.read()\n", - "#array = src.read(1)\n", - "#array_nd = numpy.ma.masked_array(array, mask=(array == src.nodata))\n", + "array = src.read()\n", + "# array = src.read(1)\n", + "# array_nd = numpy.ma.masked_array(array, mask=(array == src.nodata))\n", "#\n", "\n", - "print(f'Total harvest area of cotton: {array.sum()} hectares')" + "print(f\"Total harvest area of cotton: {array.sum()} hectares\")" ] }, { @@ -567,7 +566,7 @@ } ], "source": [ - "#get production total in india\n", + "# get production total in india\n", "!gdal_calc.py --calc \"(745/6973529)*(A)\" --format GTiff --type Float32 --NoDataValue 0.0 -A $path'cotton_hahectares_ind.tif' --A_band 1 --outfile $path'prob_purchase_area_IND_v3.tif'" ] }, @@ -590,15 +589,15 @@ "source": [ "## check tha the volume distributed is equal to the volume injested by the user.\n", "src = rio.open(path + \"prob_purchase_area_IND_v3.tif\")\n", - "print('shape:',src.shape)\n", - "print('noData:',src.nodata)\n", + "print(\"shape:\", src.shape)\n", + "print(\"noData:\", src.nodata)\n", "\n", "array = src.read()\n", - "#remove nans that appear outside boundary for extent\n", + "# remove nans that appear outside boundary for extent\n", "array_nonan = array[~numpy.isnan(array)]\n", "\n", "\n", - "print(f'Total harvest area of cotton: {round(array_nonan.sum())} tonnes')" + "print(f\"Total harvest area of cotton: {round(array_nonan.sum())} tonnes\")" ] }, { @@ -618,8 +617,8 @@ } ], "source": [ - "#calculate impact:\n", - "!gdal_calc.py --calc \"A*B\" --format GTiff --type Float32 --NoDataValue 0.0 -A $path'prob_purchase_area_IND_v3.tif' --A_band 1 -B $path'wr_cotton.tif' --outfile $path'water_impact_cotton_ind_m3yr.tif'\n" + "# calculate impact:\n", + "!gdal_calc.py --calc \"A*B\" --format GTiff --type Float32 --NoDataValue 0.0 -A $path'prob_purchase_area_IND_v3.tif' --A_band 1 -B $path'wr_cotton.tif' --outfile $path'water_impact_cotton_ind_m3yr.tif'" ] }, { @@ -643,19 +642,19 @@ ], "source": [ "# Define the colors you want\n", - "cmap = ListedColormap([\"#ffffff\", \"#fc9272\", \"#ef3b2c\", \"#a50f15\" ])\n", + "cmap = ListedColormap([\"#ffffff\", \"#fc9272\", \"#ef3b2c\", \"#a50f15\"])\n", "\n", "# Define a normalization from values -> colors\n", - "norm = colors.BoundaryNorm([0,0.025, 0.05, 0.075, 0.1], 5)\n", + "norm = colors.BoundaryNorm([0, 0.025, 0.05, 0.075, 0.1], 5)\n", "\n", - "with rio.open(path + 'water_impact_cotton_ind_m3yr.tif') as src:\n", + "with rio.open(path + \"water_impact_cotton_ind_m3yr.tif\") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((4,40))\n", - " ax.set_xlim((65,100))\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((4, 40))\n", + " ax.set_xlim((65, 100))\n", " rio.plot.show(dat, norm=norm, cmap=cmap, ax=ax, transform=src.transform)\n", - " geom.plot(ax=ax, color='red', alpha=.1, edgecolor='red')\n", - " ax.set_title('Distribution of the water impact for 745 tonnes of cotton in India in m3*year')" + " geom.plot(ax=ax, color=\"red\", alpha=0.1, edgecolor=\"red\")\n", + " ax.set_title(\"Distribution of the water impact for 745 tonnes of cotton in India in m3*year\")" ] }, { @@ -677,15 +676,15 @@ "source": [ "## obtain totalimpact for cotton in india\n", "src = rio.open(path + \"water_impact_cotton_ind_m3yr.tif\")\n", - "print('shape:',src.shape)\n", - "print('noData:',src.nodata)\n", + "print(\"shape:\", src.shape)\n", + "print(\"noData:\", src.nodata)\n", "\n", "array = src.read()\n", - "#remove nans that appear outside boundary for extent\n", + "# remove nans that appear outside boundary for extent\n", "array_nonan = array[~numpy.isnan(array)]\n", "\n", "\n", - "print(f'Water imapct for cotton in India: {round(array_nonan.sum())} m3/yr')" + "print(f\"Water imapct for cotton in India: {round(array_nonan.sum())} m3/yr\")" ] }, { @@ -723,13 +722,10 @@ } ], "source": [ - "#zonal stats in india to get the sum of all fraction harvest area\n", + "# zonal stats in india to get the sum of all fraction harvest area\n", "wr = path + \"wr_cotton_india.tif\"\n", "start_time = time.time()\n", - "risk_zs = zonal_stats(\n", - " geom,\n", - " wr,\n", - " stats=\"mean\")\n", + "risk_zs = zonal_stats(geom, wr, stats=\"mean\")\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, @@ -772,7 +768,7 @@ } ], "source": [ - "risk_zs[0]['mean']*745" + "risk_zs[0][\"mean\"] * 745" ] }, { diff --git a/data/notebooks/Lab/QA_risk_impact_calc_blwf_xr.ipynb b/data/notebooks/Lab/QA_risk_impact_calc_blwf_xr.ipynb index 80f467aac..9e43b54c2 100644 --- a/data/notebooks/Lab/QA_risk_impact_calc_blwf_xr.ipynb +++ b/data/notebooks/Lab/QA_risk_impact_calc_blwf_xr.ipynb @@ -33,16 +33,13 @@ "outputs": [], "source": [ "# import libraries\n", - "import numpy as np\n", - "import xarray as xr\n", - "import rioxarray as rxr\n", - "from xrspatial.classify import natural_breaks\n", - "import geopandas as gpd\n", "import cartopy.crs as ccrs\n", - "\n", + "import geopandas as gpd\n", + "import matplotlib.colors as colors\n", "import matplotlib.pyplot as plt\n", - "from matplotlib.colors import ListedColormap\n", - "import matplotlib.colors as colors" + "import rioxarray as rxr\n", + "import xarray as xr\n", + "from matplotlib.colors import ListedColormap" ] }, { @@ -60,20 +57,20 @@ "metadata": {}, "outputs": [], "source": [ - "def da_plot(da, gdf, color_list, values, title, x='x', y='y', xlim=(65,100), ylim=(4,40)):\n", + "def da_plot(da, gdf, color_list, values, title, x=\"x\", y=\"y\", xlim=(65, 100), ylim=(4, 40)):\n", " # Define the colors you want\n", " cmap = ListedColormap(color_list)\n", "\n", " # Define a normalization from values -> colors\n", " norm = colors.BoundaryNorm(values, len(color_list))\n", "\n", - " plt.figure(figsize=(12,10))\n", + " plt.figure(figsize=(12, 10))\n", " ax = plt.axes(projection=ccrs.PlateCarree())\n", "\n", " ax.set_global()\n", "\n", " da.plot(ax=ax, norm=norm, cmap=cmap, x=x, y=y, transform=ccrs.PlateCarree())\n", - " gdf.plot(ax=ax, color='red', alpha=.1, edgecolor='red')\n", + " gdf.plot(ax=ax, color=\"red\", alpha=0.1, edgecolor=\"red\")\n", " ax.coastlines()\n", " ax.set_xlim(xlim)\n", " ax.set_ylim(ylim)\n", @@ -97,8 +94,8 @@ "metadata": {}, "outputs": [], "source": [ - "#define path\n", - "path = '../../datasets/raw/water/QA/'" + "# define path\n", + "path = \"../../datasets/raw/water/QA/\"" ] }, { @@ -155,8 +152,8 @@ } ], "source": [ - "#import geometry:\n", - "geom = gpd.read_file(path+'gadm36_IND_0.shp')\n", + "# import geometry:\n", + "geom = gpd.read_file(path + \"gadm36_IND_0.shp\")\n", "geom.head()" ] }, @@ -572,9 +569,9 @@ } ], "source": [ - "xda = rxr.open_rasterio(path+'/bl_wf_mmyr_area.tif').squeeze().drop(\"band\")\n", - "# convert to Dataset \n", - "xds_wf = xr.Dataset({'water_footprint': xda}, attrs=xda.attrs)\n", + "xda = rxr.open_rasterio(path + \"/bl_wf_mmyr_area.tif\").squeeze().drop(\"band\")\n", + "# convert to Dataset\n", + "xds_wf = xr.Dataset({\"water_footprint\": xda}, attrs=xda.attrs)\n", "xds_wf" ] }, @@ -599,10 +596,20 @@ ], "source": [ "color_list = [\"#ffffff\", \"#73b3d8\", \"#2879b9\", \"#08306b\"]\n", - "values = [0, 29584100, 863202440, 10063202440, 105581714153]\n", - "title = 'Water footprint (mm/year)'\n", + "values = [0, 29584100, 863202440, 10063202440, 105581714153]\n", + "title = \"Water footprint (mm/year)\"\n", "\n", - "da_plot(xds_wf['water_footprint'], geom, color_list, values, title, x='x', y='y', xlim=(65,100), ylim=(4,40))" + "da_plot(\n", + " xds_wf[\"water_footprint\"],\n", + " geom,\n", + " color_list,\n", + " values,\n", + " title,\n", + " x=\"x\",\n", + " y=\"y\",\n", + " xlim=(65, 100),\n", + " ylim=(4, 40),\n", + ")" ] }, { @@ -1007,9 +1014,9 @@ } ], "source": [ - "xda = rxr.open_rasterio(path+'/cotton/cotton_Production.tif').squeeze().drop(\"band\")\n", - "# convert to Dataset \n", - "xds_cp = xr.Dataset({'cotton_production': xda}, attrs=xda.attrs)\n", + "xda = rxr.open_rasterio(path + \"/cotton/cotton_Production.tif\").squeeze().drop(\"band\")\n", + "# convert to Dataset\n", + "xds_cp = xr.Dataset({\"cotton_production\": xda}, attrs=xda.attrs)\n", "xds_cp" ] }, @@ -1036,9 +1043,19 @@ "color_list = [\"#ffffff\", \"#7bc87c\", \"#2a924a\", \"#00441b\"]\n", "values = [0, 100, 1000, 5000, 10000]\n", "\n", - "title = 'Cotton production (tons)'\n", + "title = \"Cotton production (tons)\"\n", "\n", - "da_plot(xds_cp['cotton_production'], geom, color_list, values, title, x='x', y='y', xlim=(65,100), ylim=(4,40))" + "da_plot(\n", + " xds_cp[\"cotton_production\"],\n", + " geom,\n", + " color_list,\n", + " values,\n", + " title,\n", + " x=\"x\",\n", + " y=\"y\",\n", + " xlim=(65, 100),\n", + " ylim=(4, 40),\n", + ")" ] }, { @@ -1443,9 +1460,9 @@ } ], "source": [ - "xda = rxr.open_rasterio(path+'/cotton/cotton_HarvestedAreaHectares.tif').squeeze().drop(\"band\")\n", - "# convert to Dataset \n", - "xds_cha = xr.Dataset({'cotton_harvested_area': xda}, attrs=xda.attrs)\n", + "xda = rxr.open_rasterio(path + \"/cotton/cotton_HarvestedAreaHectares.tif\").squeeze().drop(\"band\")\n", + "# convert to Dataset\n", + "xds_cha = xr.Dataset({\"cotton_harvested_area\": xda}, attrs=xda.attrs)\n", "xds_cha" ] }, @@ -1471,9 +1488,19 @@ "source": [ "color_list = [\"#ffffff\", \"#7bc87c\", \"#2a924a\", \"#00441b\"]\n", "values = [0, 10, 100, 1000, 7000]\n", - "title = 'Cotton harvested area (hectares)'\n", + "title = \"Cotton harvested area (hectares)\"\n", "\n", - "da_plot(xds_cha['cotton_harvested_area'], geom, color_list, values, title, x='x', y='y', xlim=(65,100), ylim=(4,40))" + "da_plot(\n", + " xds_cha[\"cotton_harvested_area\"],\n", + " geom,\n", + " color_list,\n", + " values,\n", + " title,\n", + " x=\"x\",\n", + " y=\"y\",\n", + " xlim=(65, 100),\n", + " ylim=(4, 40),\n", + ")" ] }, { @@ -1913,8 +1940,10 @@ } ], "source": [ - "variables = {\"cotton_production\": \"cotton_Production\",\n", - "\"cotton_harvested_area\": \"cotton_HarvestedAreaHectares\"}\n", + "variables = {\n", + " \"cotton_production\": \"cotton_Production\",\n", + " \"cotton_harvested_area\": \"cotton_HarvestedAreaHectares\",\n", + "}\n", "\n", "xds = xds_wf.copy()\n", "\n", @@ -1923,8 +1952,8 @@ " !gdal_translate -projwin -179.99166664999998 83.08834447000001 180.00836215000007 -55.91166665 -of GTiff $path\"cotton/{file_name}.tif\" $path\"cotton/{file_name}_wf_box.tif\"\n", "\n", " # Create dataset\n", - " xda = rxr.open_rasterio(path+f'/cotton/{file_name}_wf_box.tif').squeeze().drop(\"band\")\n", - " # convert to Dataset \n", + " xda = rxr.open_rasterio(path + f\"/cotton/{file_name}_wf_box.tif\").squeeze().drop(\"band\")\n", + " # convert to Dataset\n", " xds_tmp = xr.Dataset({variable: xda}, attrs=xda.attrs)\n", "\n", " # Assign water footprint coords\n", @@ -1934,7 +1963,7 @@ " # Add variable to water footprint dataset\n", " xds[variable] = xds_tmp[variable]\n", "\n", - "xds " + "xds" ] }, { @@ -1971,7 +2000,7 @@ ], "source": [ "tot_pro = xds[\"cotton_production\"].sum().data\n", - "print(f'Total production of cotton: {tot_pro} tonnes')" + "print(f\"Total production of cotton: {tot_pro} tonnes\")" ] }, { @@ -1985,7 +2014,7 @@ "\n", "xds_risk = xds.where(xds.cotton_production > 0).copy()\n", "\n", - "xds = xds.assign(water_risk = xds_risk.water_footprint*mm_to_m3/tot_pro)" + "xds = xds.assign(water_risk=xds_risk.water_footprint * mm_to_m3 / tot_pro)" ] }, { @@ -2008,12 +2037,14 @@ } ], "source": [ - "color_list = [\"#ffffff\", \"#fed98e\", \"#fe9929\", \"#d95f0e\" ]\n", - "values = [0,0.1, 1, 1.5, 2]\n", + "color_list = [\"#ffffff\", \"#fed98e\", \"#fe9929\", \"#d95f0e\"]\n", + "values = [0, 0.1, 1, 1.5, 2]\n", "\n", - "title = 'Water risk for cotton (m3/year * tons)'\n", + "title = \"Water risk for cotton (m3/year * tons)\"\n", "\n", - "da_plot(xds[\"water_risk\"], geom, color_list, values, title, x='x', y='y', xlim=(65,100), ylim=(4,40))" + "da_plot(\n", + " xds[\"water_risk\"], geom, color_list, values, title, x=\"x\", y=\"y\", xlim=(65, 100), ylim=(4, 40)\n", + ")" ] }, { @@ -2496,9 +2527,19 @@ "color_list = [\"#ffffff\", \"#7bc87c\", \"#2a924a\", \"#00441b\"]\n", "values = [0, 10, 100, 1000, 7000]\n", "\n", - "title = 'Cotton harvested area in India (hectares)'\n", + "title = \"Cotton harvested area in India (hectares)\"\n", "\n", - "da_plot(xds_ind['cotton_harvested_area'], geom, color_list, values, title, x='x', y='y', xlim=(65,100), ylim=(4,40))" + "da_plot(\n", + " xds_ind[\"cotton_harvested_area\"],\n", + " geom,\n", + " color_list,\n", + " values,\n", + " title,\n", + " x=\"x\",\n", + " y=\"y\",\n", + " xlim=(65, 100),\n", + " ylim=(4, 40),\n", + ")" ] }, { @@ -2525,7 +2566,7 @@ ], "source": [ "tot_pro_ind = xds_ind[\"cotton_production\"].sum().data\n", - "print(f'Total production of cotton in India: {tot_pro_ind } tons')" + "print(f\"Total production of cotton in India: {tot_pro_ind } tons\")" ] }, { @@ -2552,7 +2593,7 @@ ], "source": [ "tot_ha = xds_ind[\"cotton_harvested_area\"].sum().data\n", - "print(f'Total harvest area of cotton in India: {tot_ha} hectares')" + "print(f\"Total harvest area of cotton in India: {tot_ha} hectares\")" ] }, { @@ -2583,8 +2624,8 @@ ], "source": [ "volume = 745\n", - "mean_risk = xds_ind['water_risk'].mean().data\n", - "print(f'Total water impact for cotton in India:: {mean_risk * volume} m3/yr')" + "mean_risk = xds_ind[\"water_risk\"].mean().data\n", + "print(f\"Total water impact for cotton in India:: {mean_risk * volume} m3/yr\")" ] }, { @@ -2608,7 +2649,9 @@ "metadata": {}, "outputs": [], "source": [ - "xds_ind = xds_ind.assign(probability_purchase_area = (volume/tot_ha)* xds_ind[\"cotton_harvested_area\"])" + "xds_ind = xds_ind.assign(\n", + " probability_purchase_area=(volume / tot_ha) * xds_ind[\"cotton_harvested_area\"]\n", + ")" ] }, { @@ -2634,9 +2677,19 @@ "color_list = [\"#ffffff\", \"#a2b9bc\", \"#878f99\", \"#b2ad7f\", \"#6b5b95\"]\n", "values = [0, 0.005, 0.01, 0.025, 0.05, 0.1]\n", "\n", - "title = 'Probability purchase area (tons)'\n", + "title = \"Probability purchase area (tons)\"\n", "\n", - "da_plot(xds_ind['probability_purchase_area'], geom, color_list, values, title, x='x', y='y', xlim=(65,100), ylim=(4,40))" + "da_plot(\n", + " xds_ind[\"probability_purchase_area\"],\n", + " geom,\n", + " color_list,\n", + " values,\n", + " title,\n", + " x=\"x\",\n", + " y=\"y\",\n", + " xlim=(65, 100),\n", + " ylim=(4, 40),\n", + ")" ] }, { @@ -2663,7 +2716,7 @@ ], "source": [ "tot_ha = xds_ind[\"probability_purchase_area\"].sum().data\n", - "print(f'Total distrivuted volume of cottom in India: {tot_ha} tons')" + "print(f\"Total distrivuted volume of cottom in India: {tot_ha} tons\")" ] }, { @@ -2683,7 +2736,7 @@ "metadata": {}, "outputs": [], "source": [ - "xds_ind = xds_ind.assign(water_impact = xds_ind['water_risk'] * xds_ind['probability_purchase_area'])" + "xds_ind = xds_ind.assign(water_impact=xds_ind[\"water_risk\"] * xds_ind[\"probability_purchase_area\"])" ] }, { @@ -2706,12 +2759,22 @@ } ], "source": [ - "color_list = [\"#ffffff\", \"#fc9272\", \"#ef3b2c\", \"#a50f15\" ]\n", - "values = [0,0.025, 0.05, 0.075, 0.1]\n", + "color_list = [\"#ffffff\", \"#fc9272\", \"#ef3b2c\", \"#a50f15\"]\n", + "values = [0, 0.025, 0.05, 0.075, 0.1]\n", "\n", - "title = 'Distribution of the water impact for 745 tonnes of cotton in India (m3/year)'\n", + "title = \"Distribution of the water impact for 745 tonnes of cotton in India (m3/year)\"\n", "\n", - "da_plot(xds_ind['water_impact'], geom, color_list, values, title, x='x', y='y', xlim=(65,100), ylim=(4,40))" + "da_plot(\n", + " xds_ind[\"water_impact\"],\n", + " geom,\n", + " color_list,\n", + " values,\n", + " title,\n", + " x=\"x\",\n", + " y=\"y\",\n", + " xlim=(65, 100),\n", + " ylim=(4, 40),\n", + ")" ] }, { @@ -2737,8 +2800,8 @@ } ], "source": [ - "tot_impact = xds_ind['water_impact'].sum().data\n", - "print(f'Total water impact for cotton in India:: {tot_impact} m3/yr')" + "tot_impact = xds_ind[\"water_impact\"].sum().data\n", + "print(f\"Total water impact for cotton in India:: {tot_impact} m3/yr\")" ] }, { @@ -2764,8 +2827,16 @@ } ], "source": [ - "tot_impact_all = (((xds_ind[\"water_footprint\"]*mm_to_m3)/tot_pro) * (xds_ind[\"cotton_harvested_area\"]/tot_ha) * tot_pro_ind).sum().data\n", - "print(f'Total water impact for cotton in India:: {tot_impact_all} m3/yr')" + "tot_impact_all = (\n", + " (\n", + " ((xds_ind[\"water_footprint\"] * mm_to_m3) / tot_pro)\n", + " * (xds_ind[\"cotton_harvested_area\"] / tot_ha)\n", + " * tot_pro_ind\n", + " )\n", + " .sum()\n", + " .data\n", + ")\n", + "print(f\"Total water impact for cotton in India:: {tot_impact_all} m3/yr\")" ] }, { diff --git a/data/notebooks/Lab/QA_vector_to_h3_data.ipynb b/data/notebooks/Lab/QA_vector_to_h3_data.ipynb index cc66f7025..35fd5813c 100644 --- a/data/notebooks/Lab/QA_vector_to_h3_data.ipynb +++ b/data/notebooks/Lab/QA_vector_to_h3_data.ipynb @@ -8,24 +8,15 @@ "outputs": [], "source": [ "# Import libraries\n", - "import json\n", - "import os\n", - "from pathlib import Path#\n", + "from pathlib import Path\n", "\n", + "import fiona\n", "import geopandas as gpd\n", "import h3\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", "import pandas as pd\n", - "import psycopg2\n", - "import rasterio as rio\n", - "import rasterio.plot\n", - "from h3ronpy import raster, util, vector\n", + "from h3ronpy import util, vector\n", "from psycopg2.pool import ThreadedConnectionPool\n", - "from rasterstats import gen_point_query, gen_zonal_stats, zonal_stats\n", - "from shapely.geometry import LinearRing, Point, Polygon, box, mapping, shape\n", - "from sqlalchemy import create_engine\n", - "import fiona" + "from shapely.geometry import Polygon" ] }, { @@ -62,9 +53,11 @@ "metadata": {}, "outputs": [], "source": [ - "def get_h3_for_region(col: str, table: str, region: str, ps_thread_pool=postgres_thread_pool) -> pd.DataFrame:\n", + "def get_h3_for_region(\n", + " col: str, table: str, region: str, ps_thread_pool=postgres_thread_pool\n", + ") -> pd.DataFrame:\n", " conn = postgres_thread_pool.getconn()\n", - " cursor = conn.cursor()\n", + " conn.cursor()\n", " query = f\"\"\"\n", " select {col} from {table} df \n", " where df.h3index in \n", @@ -374,7 +367,7 @@ } ], "source": [ - "dupes[dupes.h3index==607576025534038015]" + "dupes[dupes.h3index == 607576025534038015]" ] }, { @@ -403,7 +396,10 @@ } ], "source": [ - "gdf.to_file('/home/biel/Vizzuality/lg_data/Y2019M07D12_Aqueduct30_V01/baseline/annual/duplicated_h3.geojson', driver='GeoJSON')" + "gdf.to_file(\n", + " \"/home/biel/Vizzuality/lg_data/Y2019M07D12_Aqueduct30_V01/baseline/annual/duplicated_h3.geojson\",\n", + " driver=\"GeoJSON\",\n", + ")" ] }, { @@ -433,7 +429,6 @@ "metadata": {}, "outputs": [], "source": [ - "import h3pandas\n", "buggy_df = gpd.read_file(aqueduct_file)\n", "buggy_df = buggy_df[[\"bws_cat\", \"aq30_id\", \"pfaf_id\", \"string_id\", \"geometry\"]]" ] @@ -773,7 +768,7 @@ "metadata": {}, "outputs": [], "source": [ - "cols = [\"bws_cat\", \"aq30_id\", \"aqid\",\"pfaf_id\", \"string_id\"]\n", + "cols = [\"bws_cat\", \"aq30_id\", \"aqid\", \"pfaf_id\", \"string_id\"]\n", "df = gpd.GeoDataFrame.from_features(records(aqueduct_file, cols)).set_crs(\"EPSG:4326\")" ] }, @@ -1010,7 +1005,9 @@ } ], "source": [ - "df = get_h3_for_region(\"bws_cat, h3index\", \"h3_grid_aqueduct_global\", \"'India'\", postgres_thread_pool)" + "df = get_h3_for_region(\n", + " \"bws_cat, h3index\", \"h3_grid_aqueduct_global\", \"'India'\", postgres_thread_pool\n", + ")" ] }, { @@ -1171,7 +1168,7 @@ } ], "source": [ - "hdf.plot(column=\"bws_cat\", figsize=(12,12))" + "hdf.plot(column=\"bws_cat\", figsize=(12, 12))" ] }, { @@ -1190,7 +1187,10 @@ } ], "source": [ - "hdf.to_file('/home/biel/Vizzuality/lg_data/Y2019M07D12_Aqueduct30_V01/baseline/annual/india_h3.geojson', driver='GeoJSON')" + "hdf.to_file(\n", + " \"/home/biel/Vizzuality/lg_data/Y2019M07D12_Aqueduct30_V01/baseline/annual/india_h3.geojson\",\n", + " driver=\"GeoJSON\",\n", + ")" ] }, { diff --git a/data/notebooks/Lab/QA_waterFoorprint_calc.ipynb b/data/notebooks/Lab/QA_waterFoorprint_calc.ipynb index ea073c303..3ee81a15f 100644 --- a/data/notebooks/Lab/QA_waterFoorprint_calc.ipynb +++ b/data/notebooks/Lab/QA_waterFoorprint_calc.ipynb @@ -7,15 +7,13 @@ "metadata": {}, "outputs": [], "source": [ - "import rasterio as rio\n", - "import rasterio.plot\n", "import geopandas as gpd\n", - "import matplotlib.pyplot as plt\n", - "from matplotlib.colors import ListedColormap\n", "import matplotlib.colors as colors\n", + "import matplotlib.pyplot as plt\n", "import numpy\n", - "\n", - "from rasterstats import zonal_stats" + "import rasterio as rio\n", + "import rasterio.plot\n", + "from matplotlib.colors import ListedColormap" ] }, { @@ -25,7 +23,7 @@ "metadata": {}, "outputs": [], "source": [ - "path = '../../datasets/raw/wf/QA/'" + "path = \"../../datasets/raw/wf/QA/\"" ] }, { @@ -82,7 +80,7 @@ } ], "source": [ - "gadm_usa = gpd.read_file(path+'gadm36_USA_0.shp')\n", + "gadm_usa = gpd.read_file(path + \"gadm36_USA_0.shp\")\n", "gadm_usa.head()" ] }, @@ -130,14 +128,14 @@ "# Define a normalization from values -> colors\n", "norm = colors.BoundaryNorm([0, 214, 429, 644, 858], 5)\n", "\n", - "with rio.open(path + 'wf_tot_mmyr/hdr.adf') as src:\n", + "with rio.open(path + \"wf_tot_mmyr/hdr.adf\") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((20,60))\n", - " ax.set_xlim((-130,-60))\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((20, 60))\n", + " ax.set_xlim((-130, -60))\n", " rio.plot.show(dat, norm=norm, cmap=cmap, ax=ax, transform=src.transform)\n", - " gadm_usa.plot(ax=ax, color='red', alpha=.1, edgecolor='red')\n", - " ax.set_title('Total water footprint mm/yr')" + " gadm_usa.plot(ax=ax, color=\"red\", alpha=0.1, edgecolor=\"red\")\n", + " ax.set_title(\"Total water footprint mm/yr\")" ] }, { @@ -212,14 +210,14 @@ ], "source": [ "src = rio.open(path + \"wf_tot_mmyr/hdr.adf\")\n", - "print('shape:',src.shape)\n", - "print('noData:',src.nodata)\n", + "print(\"shape:\", src.shape)\n", + "print(\"noData:\", src.nodata)\n", "\n", "image_read = src.read(1)\n", "src_masked = numpy.ma.masked_array(image_read, mask=(image_read == src.nodata))\n", "\n", - "print(f'GLOBAL Total sum excluding no data: {src_masked.sum()} mm/yr')\n", - "print(f'GLOBAL Total sum excluding no data: {src_masked.sum()*0.001} m3/yr')" + "print(f\"GLOBAL Total sum excluding no data: {src_masked.sum()} mm/yr\")\n", + "print(f\"GLOBAL Total sum excluding no data: {src_masked.sum()*0.001} m3/yr\")" ] }, { @@ -237,7 +235,7 @@ } ], "source": [ - "#rasterise the gadm geometry\n", + "# rasterise the gadm geometry\n", "\n", "!gdal_rasterize -l gadm36_USA_0 -burn 1.0 -tr 0.08333334 0.08333334 -a_nodata 0.0 -te -179.99166665 -55.902229309 180.00836215 83.097781811 -ot Float32 -of GTiff $path'gadm36_USA_0.shp' $path'gadm36_USA_0.tif'" ] @@ -471,14 +469,14 @@ "# Define a normalization from values -> colors\n", "norm = colors.BoundaryNorm([0, 214, 429, 644, 858], 5)\n", "\n", - "with rio.open(path + 'usa_tot_wf.tif') as src:\n", + "with rio.open(path + \"usa_tot_wf.tif\") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((20,60))\n", - " ax.set_xlim((-130,-60))\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((20, 60))\n", + " ax.set_xlim((-130, -60))\n", " rio.plot.show(dat, norm=norm, cmap=cmap, ax=ax, transform=src.transform)\n", - " gadm_usa.plot(ax=ax, color='red', alpha=.1, edgecolor='red')\n", - " ax.set_title('Total Water footprint mm/yr in USA ')" + " gadm_usa.plot(ax=ax, color=\"red\", alpha=0.1, edgecolor=\"red\")\n", + " ax.set_title(\"Total Water footprint mm/yr in USA \")" ] }, { @@ -530,7 +528,7 @@ } ], "source": [ - "#explore output for no data\n", + "# explore output for no data\n", "!gdalinfo $path'usa_tot_wf.tif'" ] }, @@ -553,14 +551,14 @@ ], "source": [ "src_usa = rio.open(path + \"usa_tot_wf.tif\")\n", - "print('shape:',src_usa.shape)\n", - "print('noData:',src_usa.nodata)\n", + "print(\"shape:\", src_usa.shape)\n", + "print(\"noData:\", src_usa.nodata)\n", "\n", "image_read_usa = src_usa.read(1)\n", "src_masked_usa = numpy.ma.masked_array(image_read_usa, mask=(image_read == src_usa.nodata))\n", "\n", - "print(f'GLOBAL Total sum excluding no data: {src_masked_usa.sum()} mm/yr')\n", - "print(f'GLOBAL Total sum excluding no data: {src_masked_usa.sum()*0.001} m3/yr')" + "print(f\"GLOBAL Total sum excluding no data: {src_masked_usa.sum()} mm/yr\")\n", + "print(f\"GLOBAL Total sum excluding no data: {src_masked_usa.sum()*0.001} m3/yr\")" ] }, { @@ -643,7 +641,7 @@ } ], "source": [ - "#explore output for no data\n", + "# explore output for no data\n", "!gdalinfo $path'usa_tot_wf_area.tif'" ] }, @@ -673,14 +671,14 @@ "# Define a normalization from values -> colors\n", "norm = colors.BoundaryNorm([10093132, 23580922297, 47151751462, 70722580627, 94293409792], 5)\n", "\n", - "with rio.open(path + 'usa_tot_wf_area.tif') as src:\n", + "with rio.open(path + \"usa_tot_wf_area.tif\") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((20,60))\n", - " ax.set_xlim((-130,-60))\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((20, 60))\n", + " ax.set_xlim((-130, -60))\n", " rio.plot.show(dat, norm=norm, cmap=cmap, ax=ax, transform=src.transform)\n", - " gadm_usa.plot(ax=ax, color='red', alpha=.1, edgecolor='red')\n", - " ax.set_title('Total Water footprint mm/yr * aream2 in USA ')" + " gadm_usa.plot(ax=ax, color=\"red\", alpha=0.1, edgecolor=\"red\")\n", + " ax.set_title(\"Total Water footprint mm/yr * aream2 in USA \")" ] }, { @@ -702,14 +700,16 @@ ], "source": [ "src_usa_area = rio.open(path + \"usa_tot_wf_area.tif\")\n", - "print('shape:',src_usa_area.shape)\n", - "print('noData:',src_usa_area.nodata)\n", + "print(\"shape:\", src_usa_area.shape)\n", + "print(\"noData:\", src_usa_area.nodata)\n", "\n", "image_read_usa_area = src_usa_area.read(1)\n", - "src_masked_usa_area = numpy.ma.masked_array(image_read_usa_area, mask=(image_read == src_usa_area.nodata))\n", + "src_masked_usa_area = numpy.ma.masked_array(\n", + " image_read_usa_area, mask=(image_read == src_usa_area.nodata)\n", + ")\n", "\n", - "print(f'GLOBAL Total sum excluding no data: {src_masked_usa_area.sum()} mm/yr')\n", - "print(f'GLOBAL Total sum excluding no data: {src_masked_usa_area.sum()*0.001/1000000000} m3/yr')" + "print(f\"GLOBAL Total sum excluding no data: {src_masked_usa_area.sum()} mm/yr\")\n", + "print(f\"GLOBAL Total sum excluding no data: {src_masked_usa_area.sum()*0.001/1000000000} m3/yr\")" ] }, { @@ -736,16 +736,16 @@ } ], "source": [ - "#compute difference between calculated value and value provided in the paper.\n", + "# compute difference between calculated value and value provided in the paper.\n", "\n", - "value = 1053 \n", + "value = 1053\n", "value_es = round(1367.447391174656)\n", "\n", "difference_abs = value - value_es\n", - "print(f'absolute difference: {difference_abs}')\n", + "print(f\"absolute difference: {difference_abs}\")\n", "\n", - "difference_rel = (value-value_es)/value\n", - "print(f'relative difference: {difference_rel}')" + "difference_rel = (value - value_es) / value\n", + "print(f\"relative difference: {difference_rel}\")" ] }, { @@ -820,7 +820,7 @@ } ], "source": [ - "gadm_ind = gpd.read_file(path+'gadm36_IND_0.shp')\n", + "gadm_ind = gpd.read_file(path + \"gadm36_IND_0.shp\")\n", "gadm_ind.head()" ] }, @@ -850,14 +850,14 @@ "# Define a normalization from values -> colors\n", "norm = colors.BoundaryNorm([0, 214, 429, 644, 858], 5)\n", "\n", - "with rio.open(path + 'wf_tot_mmyr/hdr.adf') as src:\n", + "with rio.open(path + \"wf_tot_mmyr/hdr.adf\") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((4,40))\n", - " ax.set_xlim((65,100))\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((4, 40))\n", + " ax.set_xlim((65, 100))\n", " rio.plot.show(dat, norm=norm, cmap=cmap, ax=ax, transform=src.transform)\n", - " gadm_ind.plot(ax=ax, color='red', alpha=.1, edgecolor='red')\n", - " ax.set_title('Total water footprint mm/yr')" + " gadm_ind.plot(ax=ax, color=\"red\", alpha=0.1, edgecolor=\"red\")\n", + " ax.set_title(\"Total water footprint mm/yr\")" ] }, { @@ -1035,7 +1035,7 @@ } ], "source": [ - "#multiply by wft\n", + "# multiply by wft\n", "!gdal_calc.py --calc \"A*B\" --format GTiff --type Float32 --NoDataValue 0.0 -A $path'gadm36_IND_0.tif' --A_band 1 -B $path'tot_wf_mmyr_area.tif' --outfile $path'IND_tot_wf_mmyr_area.tif'" ] }, @@ -1117,14 +1117,14 @@ "# Define a normalization from values -> colors\n", "norm = colors.BoundaryNorm([10093132, 23580922297, 47151751462, 70722580627, 94293409792], 5)\n", "\n", - "with rio.open(path + 'IND_tot_wf_mmyr_area.tif') as src:\n", + "with rio.open(path + \"IND_tot_wf_mmyr_area.tif\") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((4,40))\n", - " ax.set_xlim((65,100))\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((4, 40))\n", + " ax.set_xlim((65, 100))\n", " rio.plot.show(dat, norm=norm, cmap=cmap, ax=ax, transform=src.transform)\n", - " gadm_ind.plot(ax=ax, color='red', alpha=.1, edgecolor='red')\n", - " ax.set_title('Total water footprint mm/yr')" + " gadm_ind.plot(ax=ax, color=\"red\", alpha=0.1, edgecolor=\"red\")\n", + " ax.set_title(\"Total water footprint mm/yr\")" ] }, { @@ -1146,15 +1146,17 @@ ], "source": [ "src_india_area = rio.open(path + \"IND_tot_wf_mmyr_area.tif\")\n", - "print('shape:',src_india_area.shape)\n", - "print('noData:',src_india_area.nodata)\n", + "print(\"shape:\", src_india_area.shape)\n", + "print(\"noData:\", src_india_area.nodata)\n", "\n", "src_india_area_array = src_india_area.read()\n", - "#remove nans that appear outside boundary for extent\n", + "# remove nans that appear outside boundary for extent\n", "src_india_area_array_nonan = src_india_area_array[~numpy.isnan(src_india_area_array)]\n", "\n", - "print(f'GLOBAL Total sum excluding no data: {src_india_area_array_nonan.sum()} mm/yr')\n", - "print(f'GLOBAL Total sum excluding no data: {src_india_area_array_nonan.sum()*0.001/1000000000} m3/yr')" + "print(f\"GLOBAL Total sum excluding no data: {src_india_area_array_nonan.sum()} mm/yr\")\n", + "print(\n", + " f\"GLOBAL Total sum excluding no data: {src_india_area_array_nonan.sum()*0.001/1000000000} m3/yr\"\n", + ")" ] }, { @@ -1173,15 +1175,15 @@ } ], "source": [ - "#compute difference\n", - "value = 1182 \n", + "# compute difference\n", + "value = 1182\n", "value_es = round(1406.779091058688)\n", "\n", "difference_abs = value - value_es\n", - "print(f'absolute difference: {difference_abs}')\n", + "print(f\"absolute difference: {difference_abs}\")\n", "\n", - "difference_rel = (value-value_es)/value\n", - "print(f'relative difference: {difference_rel}')" + "difference_rel = (value - value_es) / value\n", + "print(f\"relative difference: {difference_rel}\")" ] }, { @@ -1222,22 +1224,20 @@ } ], "source": [ - "\n", - "\n", "# Define the colors you want\n", "cmap = ListedColormap([\"#ffffff\", \"#73b3d8\", \"#2879b9\", \"#08306b\"])\n", "\n", "# Define a normalization from values -> colors\n", "norm = colors.BoundaryNorm([0, 10, 50, 115, 150], 5)\n", "\n", - "with rio.open(path + 'wf_bltot_mmyr/hdr.adf') as src:\n", + "with rio.open(path + \"wf_bltot_mmyr/hdr.adf\") as src:\n", " dat = src.read(1)\n", - " fig, ax = plt.subplots(figsize=[15,10])\n", - " ax.set_ylim((4,40))\n", - " ax.set_xlim((65,100))\n", + " fig, ax = plt.subplots(figsize=[15, 10])\n", + " ax.set_ylim((4, 40))\n", + " ax.set_xlim((65, 100))\n", " rio.plot.show(dat, norm=norm, cmap=cmap, ax=ax, transform=src.transform)\n", - " gadm_ind.plot(ax=ax, color='red', alpha=.1, edgecolor='red')\n", - " ax.set_title('Blue water footprint mm/yr')" + " gadm_ind.plot(ax=ax, color=\"red\", alpha=0.1, edgecolor=\"red\")\n", + " ax.set_title(\"Blue water footprint mm/yr\")" ] }, { @@ -1271,7 +1271,7 @@ } ], "source": [ - "#get the total blue water footprint per area\n", + "# get the total blue water footprint per area\n", "!gdal_calc.py --calc \"A*10000*10000\" --format GTiff --type Float32 --NoDataValue 0.0 -A $path'wf_bltot_mmyr/hdr.adf' --A_band 1 --outfile $path'bl_wf_mmyr_area.tif'" ] }, @@ -1344,7 +1344,7 @@ } ], "source": [ - "#get the area in india\n", + "# get the area in india\n", "!gdal_calc.py --calc \"A*B\" --format GTiff --type Float32 --NoDataValue 0.0 -A $path'gadm36_IND_0.tif' --A_band 1 -B $path'bl_wf_mmyr_area.tif' --outfile $path'IND_bl_wf_mmyr_area.tif'" ] }, @@ -1419,15 +1419,17 @@ ], "source": [ "src_india_area_bl = rio.open(path + \"IND_bl_wf_mmyr_area.tif\")\n", - "print('shape:',src_india_area_bl.shape)\n", - "print('noData:',src_india_area_bl.nodata)\n", + "print(\"shape:\", src_india_area_bl.shape)\n", + "print(\"noData:\", src_india_area_bl.nodata)\n", "\n", "src_india_area_array_bl = src_india_area_bl.read()\n", - "#remove nans that appear outside boundary for extent\n", + "# remove nans that appear outside boundary for extent\n", "src_india_area_array_nonan_bl = src_india_area_array_bl[~numpy.isnan(src_india_area_array_bl)]\n", "\n", - "print(f'GLOBAL Total sum excluding no data: {src_india_area_array_nonan_bl.sum()} mm/yr')\n", - "print(f'GLOBAL Total sum excluding no data: {src_india_area_array_nonan_bl.sum()*0.001/1000000000} m3/yr')" + "print(f\"GLOBAL Total sum excluding no data: {src_india_area_array_nonan_bl.sum()} mm/yr\")\n", + "print(\n", + " f\"GLOBAL Total sum excluding no data: {src_india_area_array_nonan_bl.sum()*0.001/1000000000} m3/yr\"\n", + ")" ] }, { @@ -1446,17 +1448,15 @@ } ], "source": [ - "\n", - "\n", - "#compute difference\n", - "value = 243 \n", + "# compute difference\n", + "value = 243\n", "value_es = round(304.31571345408)\n", "\n", "difference_abs = value - value_es\n", - "print(f'absolute difference: {difference_abs}')\n", + "print(f\"absolute difference: {difference_abs}\")\n", "\n", - "difference_rel = (value-value_es)/value\n", - "print(f'relative difference: {difference_rel}')" + "difference_rel = (value - value_es) / value\n", + "print(f\"relative difference: {difference_rel}\")" ] }, { diff --git a/data/notebooks/Lab/Satelligence_data_ingestion.ipynb b/data/notebooks/Lab/Satelligence_data_ingestion.ipynb index 9d6e94af1..3e264b8ab 100644 --- a/data/notebooks/Lab/Satelligence_data_ingestion.ipynb +++ b/data/notebooks/Lab/Satelligence_data_ingestion.ipynb @@ -31,27 +31,15 @@ }, "outputs": [], "source": [ - "#from psycopg2.pool import ThreadedConnectionPool\n", + "# from psycopg2.pool import ThreadedConnectionPool\n", "import geopandas as gpd\n", - "import pandas as pd\n", - "\n", - "import folium\n", - "import matplotlib.pyplot as plt\n", - "\n", - "import h3\n", - "import h3pandas\n", - "from h3ronpy import raster\n", "import rasterio as rio\n", - "from rasterstats import gen_zonal_stats, gen_point_query, zonal_stats\n", - "\n", - "\n", - "\n", + "from h3ronpy import raster\n", + "from rasterstats import gen_point_query, zonal_stats\n", "\n", - "import mercantile as mt\n", - "from shapely.geometry import shape, Polygon, LineString\n", - "#import numpy as np\n", + "# import numpy as np\n", "\n", - "#import statistics" + "# import statistics" ] }, { @@ -61,32 +49,38 @@ "metadata": {}, "outputs": [], "source": [ - "def convert_raster_h3(raster_path, vector_path, column='deforestat', resolution=6):\n", + "def convert_raster_h3(raster_path, vector_path, column=\"deforestat\", resolution=6):\n", " with rio.open(raster_path) as src:\n", - " gdf = raster.raster_to_geodataframe(src.read(1), src.transform, h3_resolution=resolution, nodata_value=src.profile['nodata'], compacted=False)\n", + " gdf = raster.raster_to_geodataframe(\n", + " src.read(1),\n", + " src.transform,\n", + " h3_resolution=resolution,\n", + " nodata_value=src.profile[\"nodata\"],\n", + " compacted=False,\n", + " )\n", "\n", - " gdf.plot('value')\n", - " gdf['h3index'] = gdf['h3index'].apply(hex)\n", + " gdf.plot(\"value\")\n", + " gdf[\"h3index\"] = gdf[\"h3index\"].apply(hex)\n", "\n", " gdf_vector = gpd.read_file(vector_path)\n", - " clean_gdf = gdf_vector[['gfw_fid',column,'geometry']]\n", - " \n", + " clean_gdf = gdf_vector[[\"gfw_fid\", column, \"geometry\"]]\n", + "\n", " _sum_calculated = []\n", " for i, row in clean_gdf.iterrows():\n", - " filtered_gdf = clean_gdf[i:i+1]\n", - " #convert to h3\n", + " filtered_gdf = clean_gdf[i : i + 1]\n", + " # convert to h3\n", " h3_gdf = filtered_gdf.h3.polyfill_resample(resolution)\n", - " #h3_gdf = h3_gdf.reset_index().rename(columns={'h3_polyfill':'h3index'})\n", - " h3index_list = [f'0x{h3index}' for h3index in h3_gdf.index]\n", - " #filter gdf by list and get value\n", - " _sum = gdf[gdf['h3index'].isin(h3index_list)]['value'].sum()\n", + " # h3_gdf = h3_gdf.reset_index().rename(columns={'h3_polyfill':'h3index'})\n", + " h3index_list = [f\"0x{h3index}\" for h3index in h3_gdf.index]\n", + " # filter gdf by list and get value\n", + " _sum = gdf[gdf[\"h3index\"].isin(h3index_list)][\"value\"].sum()\n", " _sum_calculated.append(_sum)\n", - " \n", - " clean_gdf['estimate'] = _sum_calculated\n", + "\n", + " clean_gdf[\"estimate\"] = _sum_calculated\n", " return clean_gdf\n", "\n", "\n", - "def buffer_stats(raster_path, vector_path, buffer=50000, stat_='sum'):\n", + "def buffer_stats(raster_path, vector_path, buffer=50000, stat_=\"sum\"):\n", " \"\"\"\n", " inputs:\n", " -------------\n", @@ -94,42 +88,36 @@ " vector_path: path to point file in EPSG:4326\n", " buffer: distance in metres for coputing the buffer\n", " stats: stadistics to compute\n", - " \n", + "\n", " output\n", " -------\n", " array with statistics\"\"\"\n", - " \n", - " #open vector file\n", + "\n", + " # open vector file\n", " gdf = gpd.read_file(vector_path)\n", - " #check projection\n", - " #if gdf.crs != True:\n", + " # check projection\n", + " # if gdf.crs != True:\n", " # print(gdf.crs)\n", " # #project\n", " # print('Dataset missing projection. Please assign one!')\n", - " if gdf.crs and gdf.crs == 'EPSG:4326':\n", - " #reproject\n", - " gdf_3857 = gdf.to_crs('EPSG:3857')\n", + " if gdf.crs and gdf.crs == \"EPSG:4326\":\n", + " # reproject\n", + " gdf_3857 = gdf.to_crs(\"EPSG:3857\")\n", " ## TODO:add other validations\n", - " \n", "\n", - " #get buffer\n", + " # get buffer\n", " gdf_3857_buffer = gdf_3857.buffer(buffer)\n", - " #reproject back to epsg4326\n", - " gdf_4326_buffer = gdf_3857_buffer.to_crs('EPSG:4326')\n", - " #get statistics\n", + " # reproject back to epsg4326\n", + " gdf_4326_buffer = gdf_3857_buffer.to_crs(\"EPSG:4326\")\n", + " # get statistics\n", " vizz_stats = []\n", " for geom in gdf_4326_buffer:\n", - " stats = zonal_stats(geom,\n", - " raster_path,\n", - " stats=stat_,\n", - " all_touched = True\n", - " )\n", - " stat_sum = stats[0]['sum']\n", + " stats = zonal_stats(geom, raster_path, stats=stat_, all_touched=True)\n", + " stat_sum = stats[0][\"sum\"]\n", " vizz_stats.append(stat_sum)\n", - " #add stats in dataframe\n", - " gdf['estimated']=vizz_stats\n", - " return gdf\n", - " " + " # add stats in dataframe\n", + " gdf[\"estimated\"] = vizz_stats\n", + " return gdf" ] }, { @@ -443,9 +431,10 @@ "source": [ "gdf = buffer_stats(\n", " \"../../datasets/processed/Satelligence_data/test_rasters_2/Deforestation_2021_2022_area_ha.tif\",\n", - " '../../datasets/processed/palm_oil_mills/satelligence_mills_4326_point.shp',\n", + " \"../../datasets/processed/palm_oil_mills/satelligence_mills_4326_point.shp\",\n", " buffer=50000,\n", - " stat_='sum')\n", + " stat_=\"sum\",\n", + ")\n", "gdf.head()" ] }, @@ -582,9 +571,11 @@ } ], "source": [ - "gdf_sat = gpd.read_file('../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp')\n", + "gdf_sat = gpd.read_file(\n", + " \"../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp\"\n", + ")\n", "\n", - "clean_sat_mills = gdf_sat[['gfw_fid','deforestat','biodiversi','carbon','geometry']]\n", + "clean_sat_mills = gdf_sat[[\"gfw_fid\", \"deforestat\", \"biodiversi\", \"carbon\", \"geometry\"]]\n", "clean_sat_mills.head()" ] }, @@ -715,10 +706,12 @@ } ], "source": [ - "gdf = convert_raster_h3(\"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_areaSum_ha.tif\", \n", - " '../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp',\n", - " column='deforestat',\n", - " resolution=6)\n", + "gdf = convert_raster_h3(\n", + " \"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_areaSum_ha.tif\",\n", + " \"../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp\",\n", + " column=\"deforestat\",\n", + " resolution=6,\n", + ")\n", "gdf.head()" ] }, @@ -859,10 +852,12 @@ } ], "source": [ - "gdf = convert_raster_h3(\"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_areaSum_ha.tif\", \n", - " '../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp',\n", - " column='deforestat',\n", - " resolution=5)\n", + "gdf = convert_raster_h3(\n", + " \"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_areaSum_ha.tif\",\n", + " \"../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp\",\n", + " column=\"deforestat\",\n", + " resolution=5,\n", + ")\n", "gdf.head()" ] }, @@ -1077,12 +1072,14 @@ } ], "source": [ - "gdf = convert_raster_h3(\"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Sum_count.tif\", \n", - " '../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp',\n", - " column='deforestat',\n", - " resolution=6)\n", - "_estimated = [el*6.69019042035408517*6.69019042035408517*0.0001 for el in gdf['estimate']]\n", - "gdf['estimate']=_estimated\n", + "gdf = convert_raster_h3(\n", + " \"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Sum_count.tif\",\n", + " \"../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp\",\n", + " column=\"deforestat\",\n", + " resolution=6,\n", + ")\n", + "_estimated = [el * 6.69019042035408517 * 6.69019042035408517 * 0.0001 for el in gdf[\"estimate\"]]\n", + "gdf[\"estimate\"] = _estimated\n", "gdf.head()" ] }, @@ -1239,12 +1236,14 @@ } ], "source": [ - "gdf = convert_raster_h3(\"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Density.tif\", \n", - " '../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp',\n", - " column='deforestat',\n", - " resolution=6)\n", - "_estimated = [el*3612.9 for el in gdf['estimate']]\n", - "gdf['estimate']=_estimated\n", + "gdf = convert_raster_h3(\n", + " \"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Density.tif\",\n", + " \"../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp\",\n", + " column=\"deforestat\",\n", + " resolution=6,\n", + ")\n", + "_estimated = [el * 3612.9 for el in gdf[\"estimate\"]]\n", + "gdf[\"estimate\"] = _estimated\n", "gdf.head()" ] }, @@ -1385,12 +1384,14 @@ } ], "source": [ - "gdf = convert_raster_h3(\"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Density.tif\", \n", - " '../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp',\n", - " column='deforestat',\n", - " resolution=5)\n", - "_estimated = [el*25290.33 for el in gdf['estimate']]\n", - "gdf['estimate']=_estimated\n", + "gdf = convert_raster_h3(\n", + " \"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Density.tif\",\n", + " \"../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp\",\n", + " column=\"deforestat\",\n", + " resolution=5,\n", + ")\n", + "_estimated = [el * 25290.33 for el in gdf[\"estimate\"]]\n", + "gdf[\"estimate\"] = _estimated\n", "gdf.head()" ] }, @@ -1484,7 +1485,9 @@ } ], "source": [ - "centroids_gdf = gpd.read_file('../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Sum_count_h3/centroids.shp')\n", + "centroids_gdf = gpd.read_file(\n", + " \"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Sum_count_h3/centroids.shp\"\n", + ")\n", "centroids_gdf.head()" ] }, @@ -1495,7 +1498,9 @@ "metadata": {}, "outputs": [], "source": [ - "src_raster = \"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Density.tif\"" + "src_raster = (\n", + " \"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Density.tif\"\n", + ")" ] }, { @@ -1508,10 +1513,10 @@ "gpq_stats_h3_centroid = gen_point_query(\n", " centroids_gdf,\n", " src_raster,\n", - " interpolate = 'bilinear',\n", - " property_name = 'bilinear_stat',\n", + " interpolate=\"bilinear\",\n", + " property_name=\"bilinear_stat\",\n", " geojson_out=True,\n", - " )" + ")" ] }, { @@ -1523,12 +1528,9 @@ "source": [ "h3_point_stats = []\n", "for gen in gpq_stats_h3_centroid:\n", - " h3index= gen['properties']['h3index']\n", - " value = gen['properties']['bilinear_stat']\n", - " h3_point_stats.append({\n", - " 'h3index':h3index,\n", - " 'value':value\n", - " })" + " h3index = gen[\"properties\"][\"h3index\"]\n", + " value = gen[\"properties\"][\"bilinear_stat\"]\n", + " h3_point_stats.append({\"h3index\": h3index, \"value\": value})" ] }, { @@ -1618,16 +1620,16 @@ "metadata": {}, "outputs": [], "source": [ - "#get values for each mill\n", + "# get values for each mill\n", "def_sum_calculated = []\n", "for i, row in clean_sat_mills.iterrows():\n", - " filtered_gdf = clean_sat_mills[i:i+1]\n", - " #convert to h3\n", + " filtered_gdf = clean_sat_mills[i : i + 1]\n", + " # convert to h3\n", " h3_gdf = filtered_gdf.h3.polyfill_resample(6)\n", - " h3index_list = [f'0x{h3index}' for h3index in h3_gdf.index]\n", - " #filter gdf by list and get value\n", - " #3617 is the area of the hexagon for res6 in \n", - " def_sum = gdf_pointStats[gdf_pointStats['h3index'].isin(h3index_list)]['value'].sum()*3612.9\n", + " h3index_list = [f\"0x{h3index}\" for h3index in h3_gdf.index]\n", + " # filter gdf by list and get value\n", + " # 3617 is the area of the hexagon for res6 in\n", + " def_sum = gdf_pointStats[gdf_pointStats[\"h3index\"].isin(h3index_list)][\"value\"].sum() * 3612.9\n", " def_sum_calculated.append(def_sum)" ] }, @@ -1813,7 +1815,7 @@ } ], "source": [ - "clean_sat_mills['vizz_pointStat_bilinear_res6'] = def_sum_calculated\n", + "clean_sat_mills[\"vizz_pointStat_bilinear_res6\"] = def_sum_calculated\n", "clean_sat_mills.head()" ] }, @@ -1824,7 +1826,9 @@ "metadata": {}, "outputs": [], "source": [ - "clean_sat_mills.to_csv('../../datasets/processed/Satelligence_data/Deforestation_comparative_sat_vizz_centroid.csv')" + "clean_sat_mills.to_csv(\n", + " \"../../datasets/processed/Satelligence_data/Deforestation_comparative_sat_vizz_centroid.csv\"\n", + ")" ] }, { @@ -1859,7 +1863,7 @@ } ], "source": [ - "#download carbon layer to same resolution as deforestation area file\n", + "# download carbon layer to same resolution as deforestation area file\n", "!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -dstnodata 0.0 -tr 6e-05 6e-05 -r near -te 94.99998 2.1 98.29998 6.10002 -te_srs EPSG:4326 -multi -of GTiff '../../datasets/processed/Satelligence_data/carbon_analysis/AboveGroundBiomass_GLO_2001-01-01-2002-01-01.tif' '../../datasets/processed/Satelligence_data/carbon_analysis/AboveGroundBiomass_GLO_2001-01-01-2002-01-01_downsample.tif'" ] }, @@ -2018,7 +2022,7 @@ } ], "source": [ - "#multiply the two rasters using gdal calc\n", + "# multiply the two rasters using gdal calc\n", "!gdal_calc.py --calc \"A*B\" --format GTiff --type Float32 -A '../../datasets/processed/Satelligence_data/carbon_analysis/AboveGroundBiomass_GLO_2001-01-01-2002-01-01_downsample.tif' --A_band 1 -B '../../datasets/processed/Satelligence_data/test_rasters_2/Deforestation_2021_2022_area_ha.tif' --outfile '../../datasets/processed/Satelligence_data/carbon_analysis/carbon_loss_T_downsample.tif'" ] }, @@ -2251,10 +2255,11 @@ ], "source": [ "gdf = buffer_stats(\n", - " '../../datasets/processed/Satelligence_data/carbon_analysis/carbon_loss_T_downsample.tif',\n", - " '../../datasets/processed/palm_oil_mills/satelligence_mills_4326_point.shp',\n", + " \"../../datasets/processed/Satelligence_data/carbon_analysis/carbon_loss_T_downsample.tif\",\n", + " \"../../datasets/processed/palm_oil_mills/satelligence_mills_4326_point.shp\",\n", " buffer=50000,\n", - " stat_='sum')\n", + " stat_=\"sum\",\n", + ")\n", "gdf.head()" ] }, @@ -2265,8 +2270,8 @@ "metadata": {}, "outputs": [], "source": [ - "#export to csv\n", - "gdf.to_csv('../../datasets/processed/Satelligence_data/carbon_analysis/carbon_loss_T_stats_v2.csv')" + "# export to csv\n", + "gdf.to_csv(\"../../datasets/processed/Satelligence_data/carbon_analysis/carbon_loss_T_stats_v2.csv\")" ] }, { @@ -2417,10 +2422,12 @@ } ], "source": [ - "gdf_res6 = convert_raster_h3('../../datasets/processed/Satelligence_data/carbon_analysis/carbon_loss_T_sum_upsample.tif', \n", - " '../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp',\n", - " column='carbon',\n", - " resolution=6)\n", + "gdf_res6 = convert_raster_h3(\n", + " \"../../datasets/processed/Satelligence_data/carbon_analysis/carbon_loss_T_sum_upsample.tif\",\n", + " \"../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp\",\n", + " column=\"carbon\",\n", + " resolution=6,\n", + ")\n", "gdf_res6.head()" ] }, @@ -2543,10 +2550,12 @@ } ], "source": [ - "gdf_res5 = convert_raster_h3('../../datasets/processed/Satelligence_data/carbon_analysis/carbon_loss_T_sum_upsample.tif', \n", - " '../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp',\n", - " column='carbon',\n", - " resolution=5)\n", + "gdf_res5 = convert_raster_h3(\n", + " \"../../datasets/processed/Satelligence_data/carbon_analysis/carbon_loss_T_sum_upsample.tif\",\n", + " \"../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp\",\n", + " column=\"carbon\",\n", + " resolution=5,\n", + ")\n", "\n", "gdf_res5.head()" ] @@ -2576,7 +2585,6 @@ } ], "source": [ - "\n", "## upsample the carbon_loss_T for ingesting\n", "!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -dstnodata 0.0 -tr 0.0833333333333286 0.0833333333333286 -r sum -multi -of GTiff '../../datasets/processed/Satelligence_data/carbon_analysis/AboveGroundBiomass_GLO_2001-01-01-2002-01-01.tif' '../../datasets/processed/Satelligence_data/carbon_analysis/AboveGroundBiomass_GLO_2001-01-01-2002-01-01_upsample.tif'" ] @@ -2682,11 +2690,19 @@ } ], "source": [ - "raster_path=\"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Density.tif\"\n", + "raster_path = (\n", + " \"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Density.tif\"\n", + ")\n", "with rio.open(raster_path) as src:\n", - " gdf_densres6 = raster.raster_to_geodataframe(src.read(1), src.transform, h3_resolution=6, nodata_value=src.profile['nodata'], compacted=False)\n", - " gdf_densres6.plot('value')\n", - " gdf_densres6['h3index'] = gdf_densres6['h3index'].apply(hex)\n", + " gdf_densres6 = raster.raster_to_geodataframe(\n", + " src.read(1),\n", + " src.transform,\n", + " h3_resolution=6,\n", + " nodata_value=src.profile[\"nodata\"],\n", + " compacted=False,\n", + " )\n", + " gdf_densres6.plot(\"value\")\n", + " gdf_densres6[\"h3index\"] = gdf_densres6[\"h3index\"].apply(hex)\n", "gdf_densres6.head()" ] }, @@ -2784,11 +2800,13 @@ } ], "source": [ - "raster_path='../../datasets/processed/Satelligence_data/carbon_analysis/AboveGroundBiomass_GLO_2001-01-01-2002-01-01_upsample.tif'\n", + "raster_path = \"../../datasets/processed/Satelligence_data/carbon_analysis/AboveGroundBiomass_GLO_2001-01-01-2002-01-01_upsample.tif\"\n", "with rio.open(raster_path) as src:\n", - " gdf_carbres6 = raster.raster_to_geodataframe(src.read(1), src.transform, h3_resolution=6, compacted=False)\n", - " gdf_carbres6.plot('value')\n", - " gdf_carbres6['h3index'] = gdf_carbres6['h3index'].apply(hex)\n", + " gdf_carbres6 = raster.raster_to_geodataframe(\n", + " src.read(1), src.transform, h3_resolution=6, compacted=False\n", + " )\n", + " gdf_carbres6.plot(\"value\")\n", + " gdf_carbres6[\"h3index\"] = gdf_carbres6[\"h3index\"].apply(hex)\n", "gdf_carbres6.head()" ] }, @@ -2900,11 +2918,7 @@ } ], "source": [ - "merge = gdf_densres6.merge(\n", - " gdf_carbres6,\n", - " on='h3index',\n", - " how='inner'\n", - ")\n", + "merge = gdf_densres6.merge(gdf_carbres6, on=\"h3index\", how=\"inner\")\n", "merge.head()" ] }, @@ -2986,17 +3000,17 @@ "source": [ "h3index_array = []\n", "value_array = []\n", - "for i,row in merge.iterrows():\n", - " density = row['value_x']\n", - " carbon = row['value_y']\n", - " h3index=row['h3index']\n", - " value = density*carbon*3612.9\n", + "for i, row in merge.iterrows():\n", + " density = row[\"value_x\"]\n", + " carbon = row[\"value_y\"]\n", + " h3index = row[\"h3index\"]\n", + " value = density * carbon * 3612.9\n", " value_array.append(value)\n", " h3index_array.append(h3index)\n", "\n", "gdf_estimate = gpd.GeoDataFrame()\n", - "gdf_estimate['h3index']=h3index_array\n", - "gdf_estimate['value']=value_array\n", + "gdf_estimate[\"h3index\"] = h3index_array\n", + "gdf_estimate[\"value\"] = value_array\n", "gdf_estimate.head()" ] }, @@ -3009,13 +3023,13 @@ "source": [ "carb_sum_calculated = []\n", "for i, row in clean_sat_mills.iterrows():\n", - " filtered_gdf = clean_sat_mills[i:i+1]\n", - " #convert to h3\n", + " filtered_gdf = clean_sat_mills[i : i + 1]\n", + " # convert to h3\n", " h3_gdf = filtered_gdf.h3.polyfill_resample(6)\n", - " h3index_list = [f'0x{h3index}' for h3index in h3_gdf.index]\n", - " #filter gdf by list and get value\n", - " #3617 is the area of the hexagon for res6 in \n", - " carb_sum = gdf_estimate[gdf_estimate['h3index'].isin(h3index_list)]['value'].sum()\n", + " h3index_list = [f\"0x{h3index}\" for h3index in h3_gdf.index]\n", + " # filter gdf by list and get value\n", + " # 3617 is the area of the hexagon for res6 in\n", + " carb_sum = gdf_estimate[gdf_estimate[\"h3index\"].isin(h3index_list)][\"value\"].sum()\n", " carb_sum_calculated.append(carb_sum)" ] }, @@ -3365,9 +3379,9 @@ } ], "source": [ - "clean_sat_mills['ddbb_calc_res6']=carb_sum_calculated\n", - "clean_sat_mills['res6_precalc']=list(gdf_res6['estimate'])\n", - "clean_sat_mills['res5_precalc']=list(gdf_res5['estimate'])\n", + "clean_sat_mills[\"ddbb_calc_res6\"] = carb_sum_calculated\n", + "clean_sat_mills[\"res6_precalc\"] = list(gdf_res6[\"estimate\"])\n", + "clean_sat_mills[\"res5_precalc\"] = list(gdf_res5[\"estimate\"])\n", "clean_sat_mills.head()" ] }, @@ -3378,7 +3392,9 @@ "metadata": {}, "outputs": [], "source": [ - "clean_sat_mills.to_csv('../../datasets/processed/Satelligence_data/carbon_analysis/carbon_loss_stats.csv')" + "clean_sat_mills.to_csv(\n", + " \"../../datasets/processed/Satelligence_data/carbon_analysis/carbon_loss_stats.csv\"\n", + ")" ] }, { @@ -3414,7 +3430,7 @@ } ], "source": [ - "#reclasify raster:\n", + "# reclasify raster:\n", "!gdal_calc.py --calc \"(A>=264)*A\" --format GTiff --type Float32 --NoDataValue 0.0 -A '../../datasets/processed/Satelligence_data/biodiversity_analysis/SpeciesRichness_IDN_2021-01-01-2022-01-01.tif' --A_band 1 --outfile '../../datasets/processed/Satelligence_data/biodiversity_analysis/SpeciesRichness_high_density.tif'" ] }, @@ -3456,7 +3472,7 @@ } ], "source": [ - "#downsample\n", + "# downsample\n", "!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -dstnodata 0.0 -tr 6e-05 6e-05 -r near -te 94.99998 2.1 98.29998 6.10002 -te_srs EPSG:4326 -multi -of GTiff '../../datasets/processed/Satelligence_data/biodiversity_analysis/SpeciesRichness_high_density_mask.tif' '../../datasets/processed/Satelligence_data/biodiversity_analysis/SpeciesRichness_high_density_mask_downsample_v2.tif'" ] }, @@ -3615,7 +3631,7 @@ } ], "source": [ - "#multiply deforested area time high density of species mask\n", + "# multiply deforested area time high density of species mask\n", "!gdal_calc.py --calc \"A*B\" --format GTiff --type Float32 -A '../../datasets/processed/Satelligence_data/biodiversity_analysis/SpeciesRichness_high_density_mask_downsample_v2.tif' --A_band 1 -B '../../datasets/processed/Satelligence_data/test_rasters_2/Deforestation_2021_2022_area_ha.tif' --outfile '../../datasets/processed/Satelligence_data/biodiversity_analysis/biodiversity_loss_ha.tif'" ] }, @@ -3841,10 +3857,11 @@ ], "source": [ "gdf = buffer_stats(\n", - " '../../datasets/processed/Satelligence_data/biodiversity_analysis/biodiversity_loss_ha.tif',\n", - " '../../datasets/processed/palm_oil_mills/satelligence_mills_4326_point.shp',\n", + " \"../../datasets/processed/Satelligence_data/biodiversity_analysis/biodiversity_loss_ha.tif\",\n", + " \"../../datasets/processed/palm_oil_mills/satelligence_mills_4326_point.shp\",\n", " buffer=50000,\n", - " stat_='sum')\n", + " stat_=\"sum\",\n", + ")\n", "gdf.head()" ] }, @@ -3855,8 +3872,10 @@ "metadata": {}, "outputs": [], "source": [ - "#export to csv\n", - "gdf.to_csv('../../datasets/processed/Satelligence_data/biodiversity_analysis/biodiversity_loss_ha.csv')" + "# export to csv\n", + "gdf.to_csv(\n", + " \"../../datasets/processed/Satelligence_data/biodiversity_analysis/biodiversity_loss_ha.csv\"\n", + ")" ] }, { @@ -3876,7 +3895,7 @@ } ], "source": [ - "#upsample biodiversity raster\n", + "# upsample biodiversity raster\n", "## upsample the carbon_loss_T for ingesting\n", "!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -dstnodata 0.0 -tr 0.0833333333333286 0.0833333333333286 -r sum -multi -of GTiff '../../datasets/processed/Satelligence_data/biodiversity_analysis/biodiversity_loss_ha.tif' '../../datasets/processed/Satelligence_data/biodiversity_analysis/biodiversity_loss_ha_upsample.tif'" ] @@ -4008,10 +4027,12 @@ } ], "source": [ - "gdf_res6 = convert_raster_h3('../../datasets/processed/Satelligence_data/biodiversity_analysis/biodiversity_loss_ha_upsample.tif', \n", - " '../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp',\n", - " column='biodiversi',\n", - " resolution=6)\n", + "gdf_res6 = convert_raster_h3(\n", + " \"../../datasets/processed/Satelligence_data/biodiversity_analysis/biodiversity_loss_ha_upsample.tif\",\n", + " \"../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp\",\n", + " column=\"biodiversi\",\n", + " resolution=6,\n", + ")\n", "\n", "gdf_res6.head()" ] @@ -4143,10 +4164,12 @@ } ], "source": [ - "gdf_res5 = convert_raster_h3('../../datasets/processed/Satelligence_data/biodiversity_analysis/biodiversity_loss_ha_upsample.tif', \n", - " '../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp',\n", - " column='biodiversi',\n", - " resolution=5)\n", + "gdf_res5 = convert_raster_h3(\n", + " \"../../datasets/processed/Satelligence_data/biodiversity_analysis/biodiversity_loss_ha_upsample.tif\",\n", + " \"../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp\",\n", + " column=\"biodiversi\",\n", + " resolution=5,\n", + ")\n", "\n", "gdf_res5.head()" ] @@ -4281,11 +4304,19 @@ } ], "source": [ - "raster_path=\"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Density.tif\"\n", + "raster_path = (\n", + " \"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Density.tif\"\n", + ")\n", "with rio.open(raster_path) as src:\n", - " gdf_densres6 = raster.raster_to_geodataframe(src.read(1), src.transform, h3_resolution=6, nodata_value=src.profile['nodata'], compacted=False)\n", - " gdf_densres6.plot('value')\n", - " gdf_densres6['h3index'] = gdf_densres6['h3index'].apply(hex)\n", + " gdf_densres6 = raster.raster_to_geodataframe(\n", + " src.read(1),\n", + " src.transform,\n", + " h3_resolution=6,\n", + " nodata_value=src.profile[\"nodata\"],\n", + " compacted=False,\n", + " )\n", + " gdf_densres6.plot(\"value\")\n", + " gdf_densres6[\"h3index\"] = gdf_densres6[\"h3index\"].apply(hex)\n", "gdf_densres6.head()" ] }, @@ -4383,11 +4414,17 @@ } ], "source": [ - "raster_path='../../datasets/processed/Satelligence_data/biodiversity_analysis/SpeciesRichness_high_density_mask_upsample.tif'\n", + "raster_path = \"../../datasets/processed/Satelligence_data/biodiversity_analysis/SpeciesRichness_high_density_mask_upsample.tif\"\n", "with rio.open(raster_path) as src:\n", - " gdf_speciesres6 = raster.raster_to_geodataframe(src.read(1), src.transform, h3_resolution=6, nodata_value=src.profile['nodata'], compacted=False)\n", - " gdf_speciesres6.plot('value')\n", - " gdf_speciesres6['h3index'] = gdf_speciesres6['h3index'].apply(hex)\n", + " gdf_speciesres6 = raster.raster_to_geodataframe(\n", + " src.read(1),\n", + " src.transform,\n", + " h3_resolution=6,\n", + " nodata_value=src.profile[\"nodata\"],\n", + " compacted=False,\n", + " )\n", + " gdf_speciesres6.plot(\"value\")\n", + " gdf_speciesres6[\"h3index\"] = gdf_speciesres6[\"h3index\"].apply(hex)\n", "gdf_speciesres6.head()" ] }, @@ -4499,11 +4536,7 @@ } ], "source": [ - "merge = gdf_densres6.merge(\n", - " gdf_speciesres6,\n", - " on='h3index',\n", - " how='inner'\n", - ")\n", + "merge = gdf_densres6.merge(gdf_speciesres6, on=\"h3index\", how=\"inner\")\n", "merge.head()" ] }, @@ -4585,16 +4618,16 @@ "source": [ "h3index_array = []\n", "value_array = []\n", - "for i,row in merge.iterrows():\n", - " density = row['value_x']\n", - " h3index=row['h3index']\n", - " value = density*3612.9\n", + "for i, row in merge.iterrows():\n", + " density = row[\"value_x\"]\n", + " h3index = row[\"h3index\"]\n", + " value = density * 3612.9\n", " value_array.append(value)\n", " h3index_array.append(h3index)\n", "\n", "gdf_estimate = gpd.GeoDataFrame()\n", - "gdf_estimate['h3index']=h3index_array\n", - "gdf_estimate['value']=value_array\n", + "gdf_estimate[\"h3index\"] = h3index_array\n", + "gdf_estimate[\"value\"] = value_array\n", "gdf_estimate.head()" ] }, @@ -4711,13 +4744,13 @@ "source": [ "bio_sum_calculated = []\n", "for i, row in clean_sat_mills.iterrows():\n", - " filtered_gdf = clean_sat_mills[i:i+1]\n", - " #convert to h3\n", + " filtered_gdf = clean_sat_mills[i : i + 1]\n", + " # convert to h3\n", " h3_gdf = filtered_gdf.h3.polyfill_resample(6)\n", - " h3index_list = [f'0x{h3index}' for h3index in h3_gdf.index]\n", - " #filter gdf by list and get value\n", - " #3617 is the area of the hexagon for res6 in \n", - " bio_sum = gdf_estimate[gdf_estimate['h3index'].isin(h3index_list)]['value'].sum()\n", + " h3index_list = [f\"0x{h3index}\" for h3index in h3_gdf.index]\n", + " # filter gdf by list and get value\n", + " # 3617 is the area of the hexagon for res6 in\n", + " bio_sum = gdf_estimate[gdf_estimate[\"h3index\"].isin(h3index_list)][\"value\"].sum()\n", " bio_sum_calculated.append(bio_sum)" ] }, @@ -4859,10 +4892,9 @@ } ], "source": [ - "\n", - "clean_sat_mills['ddbb_calc_res6']=bio_sum_calculated\n", - "clean_sat_mills['res6_precalc']=list(gdf_res6['estimate'])\n", - "clean_sat_mills['res5_precalc']=list(gdf_res5['estimate'])\n", + "clean_sat_mills[\"ddbb_calc_res6\"] = bio_sum_calculated\n", + "clean_sat_mills[\"res6_precalc\"] = list(gdf_res6[\"estimate\"])\n", + "clean_sat_mills[\"res5_precalc\"] = list(gdf_res5[\"estimate\"])\n", "clean_sat_mills.head()" ] }, @@ -4873,7 +4905,9 @@ "metadata": {}, "outputs": [], "source": [ - "clean_sat_mills.to_csv('../../datasets/processed/Satelligence_data/biodiversity_analysis/biodiversity_stats_h3.csv')" + "clean_sat_mills.to_csv(\n", + " \"../../datasets/processed/Satelligence_data/biodiversity_analysis/biodiversity_stats_h3.csv\"\n", + ")" ] }, { @@ -4896,28 +4930,34 @@ "metadata": {}, "outputs": [], "source": [ - "def convert_raster_h3(raster_path, vector_path, column='deforestat', resolution=6):\n", + "def convert_raster_h3(raster_path, vector_path, column=\"deforestat\", resolution=6):\n", " with rio.open(raster_path) as src:\n", - " gdf = raster.raster_to_geodataframe(src.read(1), src.transform, h3_resolution=resolution, nodata_value=src.profile['nodata'], compacted=False)\n", + " gdf = raster.raster_to_geodataframe(\n", + " src.read(1),\n", + " src.transform,\n", + " h3_resolution=resolution,\n", + " nodata_value=src.profile[\"nodata\"],\n", + " compacted=False,\n", + " )\n", "\n", - " gdf.plot('value')\n", - " gdf['h3index'] = gdf['h3index'].apply(hex)\n", + " gdf.plot(\"value\")\n", + " gdf[\"h3index\"] = gdf[\"h3index\"].apply(hex)\n", "\n", " gdf_vector = gpd.read_file(vector_path)\n", - " clean_gdf = gdf_vector[['gfw_fid',column,'geometry']]\n", - " \n", + " clean_gdf = gdf_vector[[\"gfw_fid\", column, \"geometry\"]]\n", + "\n", " _sum_calculated = []\n", " for i, row in clean_gdf.iterrows():\n", - " filtered_gdf = clean_gdf[i:i+1]\n", - " #convert to h3\n", + " filtered_gdf = clean_gdf[i : i + 1]\n", + " # convert to h3\n", " h3_gdf = filtered_gdf.h3.polyfill_resample(resolution)\n", - " #h3_gdf = h3_gdf.reset_index().rename(columns={'h3_polyfill':'h3index'})\n", - " h3index_list = [f'0x{h3index}' for h3index in h3_gdf.index]\n", - " #filter gdf by list and get value\n", - " _sum = gdf[gdf['h3index'].isin(h3index_list)]['value'].sum()\n", + " # h3_gdf = h3_gdf.reset_index().rename(columns={'h3_polyfill':'h3index'})\n", + " h3index_list = [f\"0x{h3index}\" for h3index in h3_gdf.index]\n", + " # filter gdf by list and get value\n", + " _sum = gdf[gdf[\"h3index\"].isin(h3index_list)][\"value\"].sum()\n", " _sum_calculated.append(_sum)\n", - " \n", - " clean_gdf['estimate'] = _sum_calculated\n", + "\n", + " clean_gdf[\"estimate\"] = _sum_calculated\n", " return clean_gdf" ] }, @@ -4928,12 +4968,14 @@ "metadata": {}, "outputs": [], "source": [ - "gdf = convert_raster_h3(\"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Density.tif\", \n", - " '../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp',\n", - " column='deforestat',\n", - " resolution=6)\n", - "_estimated = [el*3612.9 for el in gdf['estimate']]\n", - "gdf['estimate']=_estimated\n", + "gdf = convert_raster_h3(\n", + " \"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Density.tif\",\n", + " \"../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp\",\n", + " column=\"deforestat\",\n", + " resolution=6,\n", + ")\n", + "_estimated = [el * 3612.9 for el in gdf[\"estimate\"]]\n", + "gdf[\"estimate\"] = _estimated\n", "gdf.head()" ] }, @@ -4944,11 +4986,11 @@ "metadata": {}, "outputs": [], "source": [ - "#raster to translate\n", - "#sat_def_raster = \"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Density.tif\"\n", + "# raster to translate\n", + "# sat_def_raster = \"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_Density.tif\"\n", "sat_def_raster = \"../../datasets/processed/Satelligence_data/Deforestation_IDN_2021-01-01-2022-01-01_areaSum_ha.tif\"\n", "\n", - "mills_locations = '../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp'\n", + "mills_locations = \"../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp\"\n", "resolution = 6" ] }, @@ -5053,12 +5095,18 @@ } ], "source": [ - "#translate raster to h3\n", + "# translate raster to h3\n", "with rio.open(sat_def_raster) as src:\n", - " gdf = raster.raster_to_geodataframe(src.read(1), src.transform, h3_resolution=resolution, nodata_value=src.profile['nodata'], compacted=False)\n", + " gdf = raster.raster_to_geodataframe(\n", + " src.read(1),\n", + " src.transform,\n", + " h3_resolution=resolution,\n", + " nodata_value=src.profile[\"nodata\"],\n", + " compacted=False,\n", + " )\n", "\n", - " gdf.plot('value')\n", - " gdf['h3index'] = gdf['h3index'].apply(hex)\n", + " gdf.plot(\"value\")\n", + " gdf[\"h3index\"] = gdf[\"h3index\"].apply(hex)\n", "gdf.head()" ] }, @@ -5152,8 +5200,8 @@ ], "source": [ "gdf_vector = gpd.read_file(mills_locations)\n", - "#gdf_vector\n", - "clean_gdf = gdf_vector[['gfw_fid','deforestat','geometry']]\n", + "# gdf_vector\n", + "clean_gdf = gdf_vector[[\"gfw_fid\", \"deforestat\", \"geometry\"]]\n", "clean_gdf.head()" ] }, @@ -5266,18 +5314,18 @@ "source": [ "_sum_calculated = []\n", "for i, row in clean_gdf.iterrows():\n", - " filtered_gdf = clean_gdf[i:i+1]\n", - " #convert to h3\n", + " filtered_gdf = clean_gdf[i : i + 1]\n", + " # convert to h3\n", " h3_gdf = filtered_gdf.h3.polyfill_resample(resolution)\n", - " #h3_gdf = h3_gdf.reset_index().rename(columns={'h3_polyfill':'h3index'})\n", - " h3index_list = [f'0x{h3index}' for h3index in h3_gdf.index]\n", - " #filter gdf by list and get value\n", - " _sum = sum((gdf[gdf['h3index'].isin(h3index_list)]['value']).unique())\n", + " # h3_gdf = h3_gdf.reset_index().rename(columns={'h3_polyfill':'h3index'})\n", + " h3index_list = [f\"0x{h3index}\" for h3index in h3_gdf.index]\n", + " # filter gdf by list and get value\n", + " _sum = sum((gdf[gdf[\"h3index\"].isin(h3index_list)][\"value\"]).unique())\n", " _sum_calculated.append(_sum)\n", "\n", - "clean_gdf['estimate'] = _sum_calculated\n", - "_estimated = [el for el in clean_gdf['estimate']]\n", - "clean_gdf['estimate']=_estimated\n", + "clean_gdf[\"estimate\"] = _sum_calculated\n", + "_estimated = [el for el in clean_gdf[\"estimate\"]]\n", + "clean_gdf[\"estimate\"] = _estimated\n", "clean_gdf.head()" ] }, @@ -5288,7 +5336,9 @@ "metadata": {}, "outputs": [], "source": [ - "clean_gdf.to_csv('../../datasets/processed/Satelligence_data/deforestation_stats_areasum_unique_h3.csv')" + "clean_gdf.to_csv(\n", + " \"../../datasets/processed/Satelligence_data/deforestation_stats_areasum_unique_h3.csv\"\n", + ")" ] }, { diff --git a/data/notebooks/templates/init/initial_template.ipynb b/data/notebooks/templates/init/initial_template.ipynb deleted file mode 100644 index b327c56b9..000000000 --- a/data/notebooks/templates/init/initial_template.ipynb +++ /dev/null @@ -1,243 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Title" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The title of the notebook should be coherent with file name. The file name should be:\n", - "\n", - "progressive number_title.ipynb\n", - "\n", - "For example:\n", - "01_Data_Exploration.ipynb" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Purpose" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "State the purpose of the notebook." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Methodology" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Quickly describle assumptions and processing steps." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## WIP - improvements" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Use this section only if the notebook is not final.\n", - "\n", - "Notable TODOs:\n", - "\n", - "- Todo 1;\n", - "- Todo 2;\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Results" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Describe and comment the most important results." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Suggested next steps" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "State suggested next steps, based on results obtained in this notebook." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Setup" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Library import" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We import all the required PYthon libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Data manipulation\n", - "import os\n", - "import pandas as pd\n", - "import geopandas as gpd\n", - "import numpy as np\n", - "\n", - "# Visualization\n", - "import plotly\n", - "import matplotlib as plt\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data import" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# We retrieve all the data required for the analysis.\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Parameter definition" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# We set all relevant parameters for our notebook. (agrrements in naming convention).\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data processing" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Put here the core of the notebook. Feel free to further split this section into subsections.\n", - "\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## References" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Report here relevant references:\n", - "\n", - "1. author1, article1, journal1, year1, url1\n", - "2. author2, article2, journal2, year2, url2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.1" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/data/pyproject.toml b/data/pyproject.toml new file mode 100644 index 000000000..e10a202cf --- /dev/null +++ b/data/pyproject.toml @@ -0,0 +1,10 @@ +[tool.black] +line-length = 100 + +[tool.isort] +profile = "black" + +[tool.ruff] +select = ["E", "F", "N"] +line-length = 100 +ignore = []