diff --git a/data/.dockerignore b/data/.dockerignore
deleted file mode 100644
index 999402d10..000000000
--- a/data/.dockerignore
+++ /dev/null
@@ -1,9 +0,0 @@
-.gitignore
-.github
-.editorconfig
-.env.example
-.pre-commit-config.yaml
-data
-tests
-notebooks
-docs
diff --git a/data/.pre-commit-config.yaml b/data/.pre-commit-config.yaml
index 545404f46..8aa55e294 100644
--- a/data/.pre-commit-config.yaml
+++ b/data/.pre-commit-config.yaml
@@ -1,6 +1,36 @@
repos:
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v2.3.0
+ hooks:
+ - id: check-yaml
+ - id: end-of-file-fixer
+ - id: trailing-whitespace
+
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.0.275
+ hooks:
+ - id: ruff
+ args: [ "--line-length=100", "--select=E,F,N"]
+
- repo: https://github.com/psf/black
- rev: 23.1.0
+ rev: 23.3.0
hooks:
- id: black
- args: ["--line-length=120"]
+ args: [ "--line-length=100" ]
+
+ - repo: https://github.com/nbQA-dev/nbQA
+ rev: 1.7.0
+ hooks:
+ - id: nbqa-black
+ args: [ "--line-length=100"]
+ - id: nbqa-isort
+ args: [ "--float-to-top", "--profile=black"]
+ - id: nbqa-ruff
+ args: [ "--line-length=100" , "--select=E,F,N", "--fix"]
+
+ # check for private keys and passwords!
+ - repo: https://github.com/gitleaks/gitleaks
+ rev: v8.17.0
+ hooks:
+ - id: gitleaks-docker
+
diff --git a/data/Dockerfile b/data/importers/Dockerfile
similarity index 87%
rename from data/Dockerfile
rename to data/importers/Dockerfile
index 029c0e46c..639608660 100644
--- a/data/Dockerfile
+++ b/data/importers/Dockerfile
@@ -21,7 +21,7 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2
RUN pip install -q --upgrade --no-cache-dir pip
-COPY ./requirements.txt requirements.txt
+COPY requirements.txt requirements.txt
RUN pip install -q --no-cache-dir -r requirements.txt
WORKDIR /
@@ -30,7 +30,7 @@ RUN mkdir -p data/
COPY ./base_data_importer/ /base_data_importer
COPY ./data_download/ /data_download
COPY h3_data_importer /h3_data_importer
-COPY indicator_coefficient_importer/ /indicator_coefficient_importer
-COPY ./Makefile ./Makefile
+COPY indicator_coefficient_importer /indicator_coefficient_importer
+COPY Makefile ./Makefile
ENTRYPOINT ["/usr/bin/make"]
diff --git a/data/Makefile b/data/importers/Makefile
similarity index 100%
rename from data/Makefile
rename to data/importers/Makefile
diff --git a/data/h3_data_importer/__init__.py b/data/importers/__init__.py
similarity index 100%
rename from data/h3_data_importer/__init__.py
rename to data/importers/__init__.py
diff --git a/data/base_data_importer/Makefile b/data/importers/base_data_importer/Makefile
similarity index 100%
rename from data/base_data_importer/Makefile
rename to data/importers/base_data_importer/Makefile
diff --git a/data/base_data_importer/README.md b/data/importers/base_data_importer/README.md
similarity index 100%
rename from data/base_data_importer/README.md
rename to data/importers/base_data_importer/README.md
diff --git a/data/base_data_importer/csv_to_table.py b/data/importers/base_data_importer/csv_to_table.py
similarity index 100%
rename from data/base_data_importer/csv_to_table.py
rename to data/importers/base_data_importer/csv_to_table.py
diff --git a/data/base_data_importer/data/1.units.csv b/data/importers/base_data_importer/data/1.units.csv
similarity index 100%
rename from data/base_data_importer/data/1.units.csv
rename to data/importers/base_data_importer/data/1.units.csv
diff --git a/data/base_data_importer/data/2.indicator.csv b/data/importers/base_data_importer/data/2.indicator.csv
similarity index 100%
rename from data/base_data_importer/data/2.indicator.csv
rename to data/importers/base_data_importer/data/2.indicator.csv
diff --git a/data/base_data_importer/data/3.unit_conversion.csv b/data/importers/base_data_importer/data/3.unit_conversion.csv
similarity index 100%
rename from data/base_data_importer/data/3.unit_conversion.csv
rename to data/importers/base_data_importer/data/3.unit_conversion.csv
diff --git a/data/base_data_importer/data/4.material.csv b/data/importers/base_data_importer/data/4.material.csv
similarity index 100%
rename from data/base_data_importer/data/4.material.csv
rename to data/importers/base_data_importer/data/4.material.csv
diff --git a/data/data.sh b/data/importers/data.sh
similarity index 100%
rename from data/data.sh
rename to data/importers/data.sh
diff --git a/data/data_download/Makefile b/data/importers/data_download/Makefile
similarity index 100%
rename from data/data_download/Makefile
rename to data/importers/data_download/Makefile
diff --git a/data/data_download/countriesregions.csv b/data/importers/data_download/countriesregions.csv
similarity index 100%
rename from data/data_download/countriesregions.csv
rename to data/importers/data_download/countriesregions.csv
diff --git a/data/data_download/populate_admin_regions.sql b/data/importers/data_download/populate_admin_regions.sql
similarity index 100%
rename from data/data_download/populate_admin_regions.sql
rename to data/importers/data_download/populate_admin_regions.sql
diff --git a/data/docker-compose.yml b/data/importers/docker-compose.yml
similarity index 78%
rename from data/docker-compose.yml
rename to data/importers/docker-compose.yml
index 26b89fdef..fe20c09b8 100644
--- a/data/docker-compose.yml
+++ b/data/importers/docker-compose.yml
@@ -2,9 +2,9 @@ version: "3.8"
services:
landgriffon-seed-data:
build:
- context: ./
+ context: ..
dockerfile: Dockerfile
command: seed-data
env_file:
- - '../.env'
+ - '../../.env'
network_mode: "host"
diff --git a/data/h3_data_importer/LCIA_UNEP_SETAC/Ch3 CFs_Climate change v01.xlsx b/data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch3 CFs_Climate change v01.xlsx
similarity index 100%
rename from data/h3_data_importer/LCIA_UNEP_SETAC/Ch3 CFs_Climate change v01.xlsx
rename to data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch3 CFs_Climate change v01.xlsx
diff --git a/data/h3_data_importer/LCIA_UNEP_SETAC/Ch4_IntakeFractions+CharacterizationFactors v01.xlsx b/data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch4_IntakeFractions+CharacterizationFactors v01.xlsx
similarity index 100%
rename from data/h3_data_importer/LCIA_UNEP_SETAC/Ch4_IntakeFractions+CharacterizationFactors v01.xlsx
rename to data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch4_IntakeFractions+CharacterizationFactors v01.xlsx
diff --git a/data/h3_data_importer/LCIA_UNEP_SETAC/Ch5.1_AWARE - Watersheds.xlsx b/data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch5.1_AWARE - Watersheds.xlsx
similarity index 100%
rename from data/h3_data_importer/LCIA_UNEP_SETAC/Ch5.1_AWARE - Watersheds.xlsx
rename to data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch5.1_AWARE - Watersheds.xlsx
diff --git a/data/h3_data_importer/LCIA_UNEP_SETAC/Ch5.1_AWARE_country_regions_world_april2016.xlsx b/data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch5.1_AWARE_country_regions_world_april2016.xlsx
similarity index 100%
rename from data/h3_data_importer/LCIA_UNEP_SETAC/Ch5.1_AWARE_country_regions_world_april2016.xlsx
rename to data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch5.1_AWARE_country_regions_world_april2016.xlsx
diff --git a/data/h3_data_importer/LCIA_UNEP_SETAC/Ch5.2_Water-HumanHealthCFlist_2016-12-12 v01.xlsx b/data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch5.2_Water-HumanHealthCFlist_2016-12-12 v01.xlsx
similarity index 100%
rename from data/h3_data_importer/LCIA_UNEP_SETAC/Ch5.2_Water-HumanHealthCFlist_2016-12-12 v01.xlsx
rename to data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch5.2_Water-HumanHealthCFlist_2016-12-12 v01.xlsx
diff --git a/data/h3_data_importer/LCIA_UNEP_SETAC/Ch6 PSLglobal v01.xlsx b/data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch6 PSLglobal v01.xlsx
similarity index 100%
rename from data/h3_data_importer/LCIA_UNEP_SETAC/Ch6 PSLglobal v01.xlsx
rename to data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch6 PSLglobal v01.xlsx
diff --git a/data/h3_data_importer/LCIA_UNEP_SETAC/Ch6_PSL_regional_ecoregions_v01.csv b/data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch6_PSL_regional_ecoregions_v01.csv
similarity index 100%
rename from data/h3_data_importer/LCIA_UNEP_SETAC/Ch6_PSL_regional_ecoregions_v01.csv
rename to data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch6_PSL_regional_ecoregions_v01.csv
diff --git a/data/h3_data_importer/LCIA_UNEP_SETAC/Ch6_PSLregional_v01.xlsx b/data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch6_PSLregional_v01.xlsx
similarity index 100%
rename from data/h3_data_importer/LCIA_UNEP_SETAC/Ch6_PSLregional_v01.xlsx
rename to data/importers/h3_data_importer/LCIA_UNEP_SETAC/Ch6_PSLregional_v01.xlsx
diff --git a/data/h3_data_importer/Makefile b/data/importers/h3_data_importer/Makefile
similarity index 100%
rename from data/h3_data_importer/Makefile
rename to data/importers/h3_data_importer/Makefile
diff --git a/data/h3_data_importer/README.md b/data/importers/h3_data_importer/README.md
similarity index 100%
rename from data/h3_data_importer/README.md
rename to data/importers/h3_data_importer/README.md
diff --git a/data/indicator_coefficient_importer/__init__.py b/data/importers/h3_data_importer/__init__.py
similarity index 100%
rename from data/indicator_coefficient_importer/__init__.py
rename to data/importers/h3_data_importer/__init__.py
diff --git a/data/h3_data_importer/cog_to_contextual_layer_linker.py b/data/importers/h3_data_importer/cog_to_contextual_layer_linker.py
similarity index 100%
rename from data/h3_data_importer/cog_to_contextual_layer_linker.py
rename to data/importers/h3_data_importer/cog_to_contextual_layer_linker.py
diff --git a/data/h3_data_importer/commodity_mapping/commodity_tif_mapping_v1_20210809 - commodities.csv b/data/importers/h3_data_importer/commodity_mapping/commodity_tif_mapping_v1_20210809 - commodities.csv
similarity index 100%
rename from data/h3_data_importer/commodity_mapping/commodity_tif_mapping_v1_20210809 - commodities.csv
rename to data/importers/h3_data_importer/commodity_mapping/commodity_tif_mapping_v1_20210809 - commodities.csv
diff --git a/data/h3_data_importer/contextual_layers_metadata/bluewater_footprint_metadata.json b/data/importers/h3_data_importer/contextual_layers_metadata/bluewater_footprint_metadata.json
similarity index 100%
rename from data/h3_data_importer/contextual_layers_metadata/bluewater_footprint_metadata.json
rename to data/importers/h3_data_importer/contextual_layers_metadata/bluewater_footprint_metadata.json
diff --git a/data/h3_data_importer/contextual_layers_metadata/contextual_metadata_schema.json b/data/importers/h3_data_importer/contextual_layers_metadata/contextual_metadata_schema.json
similarity index 100%
rename from data/h3_data_importer/contextual_layers_metadata/contextual_metadata_schema.json
rename to data/importers/h3_data_importer/contextual_layers_metadata/contextual_metadata_schema.json
diff --git a/data/h3_data_importer/contextual_layers_metadata/h3_grid_aqueduct_global_metadata.json b/data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_aqueduct_global_metadata.json
similarity index 100%
rename from data/h3_data_importer/contextual_layers_metadata/h3_grid_aqueduct_global_metadata.json
rename to data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_aqueduct_global_metadata.json
diff --git a/data/h3_data_importer/contextual_layers_metadata/h3_grid_bio_global_metadata.json b/data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_bio_global_metadata.json
similarity index 100%
rename from data/h3_data_importer/contextual_layers_metadata/h3_grid_bio_global_metadata.json
rename to data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_bio_global_metadata.json
diff --git a/data/h3_data_importer/contextual_layers_metadata/h3_grid_carbon_global_metadata.json b/data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_carbon_global_metadata.json
similarity index 100%
rename from data/h3_data_importer/contextual_layers_metadata/h3_grid_carbon_global_metadata.json
rename to data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_carbon_global_metadata.json
diff --git a/data/h3_data_importer/contextual_layers_metadata/h3_grid_deforestation_global_metadata.json b/data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_deforestation_global_metadata.json
similarity index 100%
rename from data/h3_data_importer/contextual_layers_metadata/h3_grid_deforestation_global_metadata.json
rename to data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_deforestation_global_metadata.json
diff --git a/data/h3_data_importer/contextual_layers_metadata/h3_grid_hdi_global_metadata.json b/data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_hdi_global_metadata.json
similarity index 100%
rename from data/h3_data_importer/contextual_layers_metadata/h3_grid_hdi_global_metadata.json
rename to data/importers/h3_data_importer/contextual_layers_metadata/h3_grid_hdi_global_metadata.json
diff --git a/data/h3_data_importer/csv_to_h3_table.py b/data/importers/h3_data_importer/csv_to_h3_table.py
similarity index 100%
rename from data/h3_data_importer/csv_to_h3_table.py
rename to data/importers/h3_data_importer/csv_to_h3_table.py
diff --git a/data/h3_data_importer/data_checksums/deforestation b/data/importers/h3_data_importer/data_checksums/deforestation
similarity index 100%
rename from data/h3_data_importer/data_checksums/deforestation
rename to data/importers/h3_data_importer/data_checksums/deforestation
diff --git a/data/h3_data_importer/data_checksums/forestGHG b/data/importers/h3_data_importer/data_checksums/forestGHG
similarity index 100%
rename from data/h3_data_importer/data_checksums/forestGHG
rename to data/importers/h3_data_importer/data_checksums/forestGHG
diff --git a/data/h3_data_importer/data_checksums/mapspam_ha b/data/importers/h3_data_importer/data_checksums/mapspam_ha
similarity index 100%
rename from data/h3_data_importer/data_checksums/mapspam_ha
rename to data/importers/h3_data_importer/data_checksums/mapspam_ha
diff --git a/data/h3_data_importer/data_checksums/mapspam_prod b/data/importers/h3_data_importer/data_checksums/mapspam_prod
similarity index 100%
rename from data/h3_data_importer/data_checksums/mapspam_prod
rename to data/importers/h3_data_importer/data_checksums/mapspam_prod
diff --git a/data/h3_data_importer/raster_folder_to_h3_table.py b/data/importers/h3_data_importer/raster_folder_to_h3_table.py
similarity index 100%
rename from data/h3_data_importer/raster_folder_to_h3_table.py
rename to data/importers/h3_data_importer/raster_folder_to_h3_table.py
diff --git a/data/h3_data_importer/utils.py b/data/importers/h3_data_importer/utils.py
similarity index 100%
rename from data/h3_data_importer/utils.py
rename to data/importers/h3_data_importer/utils.py
diff --git a/data/h3_data_importer/vector_folder_to_h3_table.py b/data/importers/h3_data_importer/vector_folder_to_h3_table.py
similarity index 100%
rename from data/h3_data_importer/vector_folder_to_h3_table.py
rename to data/importers/h3_data_importer/vector_folder_to_h3_table.py
diff --git a/data/indicator_coefficient_importer/Makefile b/data/importers/indicator_coefficient_importer/Makefile
similarity index 100%
rename from data/indicator_coefficient_importer/Makefile
rename to data/importers/indicator_coefficient_importer/Makefile
diff --git a/data/notebooks/Final/.gitkeep b/data/importers/indicator_coefficient_importer/__init__.py
similarity index 100%
rename from data/notebooks/Final/.gitkeep
rename to data/importers/indicator_coefficient_importer/__init__.py
diff --git a/data/indicator_coefficient_importer/data/bwfp_indicator_coefficients.csv b/data/importers/indicator_coefficient_importer/data/bwfp_indicator_coefficients.csv
similarity index 100%
rename from data/indicator_coefficient_importer/data/bwfp_indicator_coefficients.csv
rename to data/importers/indicator_coefficient_importer/data/bwfp_indicator_coefficients.csv
diff --git a/data/indicator_coefficient_importer/indicator_coefficient_importer.py b/data/importers/indicator_coefficient_importer/indicator_coefficient_importer.py
similarity index 100%
rename from data/indicator_coefficient_importer/indicator_coefficient_importer.py
rename to data/importers/indicator_coefficient_importer/indicator_coefficient_importer.py
diff --git a/data/requirements.txt b/data/importers/requirements.txt
similarity index 100%
rename from data/requirements.txt
rename to data/importers/requirements.txt
diff --git a/data/notebooks/Lab/0_1_Crop_data_exploration_and_upscaling.ipynb b/data/notebooks/Lab/0_1_Crop_data_exploration_and_upscaling.ipynb
index adb972ea6..dae84cad6 100644
--- a/data/notebooks/Lab/0_1_Crop_data_exploration_and_upscaling.ipynb
+++ b/data/notebooks/Lab/0_1_Crop_data_exploration_and_upscaling.ipynb
@@ -15,17 +15,19 @@
}
],
"source": [
- "import pandas as pd\n",
- "import geopandas as gpd\n",
"import csv\n",
- "import requests\n",
- "import zipfile\n",
- "import os\n",
"import io\n",
- "import seaborn as sns\n",
+ "import os\n",
+ "import zipfile\n",
+ "\n",
+ "import geopandas as gpd\n",
"import matplotlib.pyplot as plt\n",
+ "import pandas as pd\n",
"import rasterio as rio\n",
"import rasterio.plot\n",
+ "import requests\n",
+ "import seaborn as sns\n",
+ "\n",
"%matplotlib inline"
]
},
@@ -189,7 +191,7 @@
}
],
"source": [
- "processed_data = gpd.read_file('../../datasets/processed/user_data/located_lg_data_point_v2.shp')\n",
+ "processed_data = gpd.read_file(\"../../datasets/processed/user_data/located_lg_data_point_v2.shp\")\n",
"processed_data.head()"
]
},
@@ -210,7 +212,6 @@
}
],
"source": [
- "\n",
"sns.set_style(style=\"darkgrid\")\n",
"ax = sns.countplot(x=\"Material\", data=processed_data)"
]
@@ -236,7 +237,7 @@
}
],
"source": [
- "processed_data.groupby('Material')['Volume'].sum()"
+ "processed_data.groupby(\"Material\")[\"Volume\"].sum()"
]
},
{
@@ -285,12 +286,12 @@
"with requests.Session() as s:\n",
" download = s.get(url)\n",
"\n",
- " decoded_content = download.content.decode('utf-8')\n",
+ " decoded_content = download.content.decode(\"utf-8\")\n",
"\n",
- " data = csv.reader(decoded_content.splitlines(), delimiter=',')\n",
+ " data = csv.reader(decoded_content.splitlines(), delimiter=\",\")\n",
" my_list = list(data)\n",
- " \n",
- " \n",
+ "\n",
+ "\n",
"FAO_df = pd.DataFrame(my_list, columns=my_list[0])"
]
},
@@ -470,9 +471,9 @@
"with requests.Session() as s:\n",
" download = s.get(url)\n",
"\n",
- " decoded_content = download.content.decode('utf-8')\n",
+ " decoded_content = download.content.decode(\"utf-8\")\n",
"\n",
- " data = csv.reader(decoded_content.splitlines(), delimiter=',')\n",
+ " data = csv.reader(decoded_content.splitlines(), delimiter=\",\")\n",
" my_list = list(data)\n",
"FAO_indonesia_df = pd.DataFrame(my_list, columns=my_list[0])"
]
@@ -962,15 +963,15 @@
"# http://www.earthstat.org/harvested-area-yield-175-crops/\n",
"url = \"https://s3.us-east-2.amazonaws.com/earthstatdata/HarvestedAreaYield175Crops_Indvidual_Geotiff/cotton_HarvAreaYield_Geotiff.zip\"\n",
"\n",
- "local_path = '../raw_data/cotton_earthstat'\n",
+ "local_path = \"../raw_data/cotton_earthstat\"\n",
"if not os.path.isdir(local_path):\n",
" os.mkdir(local_path)\n",
"\n",
- " print('Downloading shapefile...')\n",
+ " print(\"Downloading shapefile...\")\n",
" r = requests.get(url)\n",
" z = zipfile.ZipFile(io.BytesIO(r.content))\n",
" print(\"Done\")\n",
- " z.extractall(path=local_path) # extract to folder\n",
+ " z.extractall(path=local_path) # extract to folder\n",
" print(\"Data extracted!\")"
]
},
@@ -994,12 +995,13 @@
],
"source": [
"# Use rasterio to import the reprojected data as img\n",
- "out_path = '../raw_data/cotton_earthstat/cotton_HarvAreaYield_Geotiff/cotton_HarvestedAreaHectares.tif'\n",
+ "out_path = (\n",
+ " \"../raw_data/cotton_earthstat/cotton_HarvAreaYield_Geotiff/cotton_HarvestedAreaHectares.tif\"\n",
+ ")\n",
"with rio.open(out_path) as src:\n",
" arr = src.read(out_shape=(src.height, src.width))\n",
"\n",
"\n",
- "\n",
"plt.imshow(arr[0])\n",
"plt.show()"
]
@@ -1022,15 +1024,15 @@
"source": [
"# get country data from\n",
"url = \"https://biogeo.ucdavis.edu/data/gadm3.6/shp/gadm36_IDN_shp.zip\"\n",
- "local_path = '../raw_data/gadm_indonesia'\n",
+ "local_path = \"../raw_data/gadm_indonesia\"\n",
"if not os.path.isdir(local_path):\n",
" os.mkdir(local_path)\n",
"\n",
- " print('Downloading shapefile...')\n",
+ " print(\"Downloading shapefile...\")\n",
" r = requests.get(url)\n",
" z = zipfile.ZipFile(io.BytesIO(r.content))\n",
" print(\"Done\")\n",
- " z.extractall(path=local_path) # extract to folder\n",
+ " z.extractall(path=local_path) # extract to folder\n",
" print(\"Data extracted!\")"
]
},
@@ -1108,7 +1110,7 @@
}
],
"source": [
- "gadm_ind = gpd.read_file('../raw_data/gadm_indonesia/gadm36_IDN_0.shp')\n",
+ "gadm_ind = gpd.read_file(\"../raw_data/gadm_indonesia/gadm36_IDN_0.shp\")\n",
"gadm_ind.head()"
]
},
@@ -1173,14 +1175,16 @@
}
],
"source": [
- "with rio.open('../raw_data/cotton_earthstat/cotton_HarvAreaYield_Geotiff/cotton_HarvestedAreaHectares.tif') as src:\n",
+ "with rio.open(\n",
+ " \"../raw_data/cotton_earthstat/cotton_HarvAreaYield_Geotiff/cotton_HarvestedAreaHectares.tif\"\n",
+ ") as src:\n",
" dat = src.read(1)\n",
- " fig, ax = plt.subplots(figsize=[10,5])\n",
- " ax.set_ylim((-10,8))\n",
- " ax.set_xlim((90,145))\n",
- " rio.plot.show(dat, vmax=100, cmap='BrBG', ax=ax, transform=src.transform)\n",
- " gadm_ind.plot(ax=ax, color='', edgecolor='yellow')\n",
- " ax.set_title('Cotton production in Indonesia (green: higher production)')"
+ " fig, ax = plt.subplots(figsize=[10, 5])\n",
+ " ax.set_ylim((-10, 8))\n",
+ " ax.set_xlim((90, 145))\n",
+ " rio.plot.show(dat, vmax=100, cmap=\"BrBG\", ax=ax, transform=src.transform)\n",
+ " gadm_ind.plot(ax=ax, color=\"\", edgecolor=\"yellow\")\n",
+ " ax.set_title(\"Cotton production in Indonesia (green: higher production)\")"
]
},
{
@@ -1198,7 +1202,7 @@
}
],
"source": [
- "feature = RetrieveBoundaries(query='India')"
+ "feature = RetrieveBoundaries(query=\"India\")"
]
},
{
@@ -1263,8 +1267,7 @@
}
],
"source": [
- "\n",
- "gdf = gpd.GeoDataFrame.from_features(feature.feature_json, crs='epsg:4326')\n",
+ "gdf = gpd.GeoDataFrame.from_features(feature.feature_json, crs=\"epsg:4326\")\n",
"gdf"
]
},
@@ -1327,14 +1330,16 @@
}
],
"source": [
- "with rio.open('../raw_data/cotton_earthstat/cotton_HarvAreaYield_Geotiff/cotton_HarvestedAreaHectares.tif') as src:\n",
+ "with rio.open(\n",
+ " \"../raw_data/cotton_earthstat/cotton_HarvAreaYield_Geotiff/cotton_HarvestedAreaHectares.tif\"\n",
+ ") as src:\n",
" dat = src.read(1)\n",
- " fig, ax = plt.subplots(figsize=[15,10])\n",
- " ax.set_ylim((-5,40))\n",
- " ax.set_xlim((60,100))\n",
- " rio.plot.show(dat, vmax=100, cmap='BrBG', ax=ax, transform=src.transform)\n",
- " gdf.plot(ax=ax, color='', edgecolor='yellow')\n",
- " ax.set_title('Cotton production in India (green: higher production)')"
+ " fig, ax = plt.subplots(figsize=[15, 10])\n",
+ " ax.set_ylim((-5, 40))\n",
+ " ax.set_xlim((60, 100))\n",
+ " rio.plot.show(dat, vmax=100, cmap=\"BrBG\", ax=ax, transform=src.transform)\n",
+ " gdf.plot(ax=ax, color=\"\", edgecolor=\"yellow\")\n",
+ " ax.set_title(\"Cotton production in India (green: higher production)\")"
]
},
{
@@ -1372,8 +1377,8 @@
],
"source": [
"## get upscaling factor for indonesia\n",
- "upscaling_factor = float(FAO_indonesia_df.iloc[1].Value)/float(FAO_indonesia_df.iloc[0].Value)\n",
- "print(f'The upscaling factor is: {upscaling_factor}')"
+ "upscaling_factor = float(FAO_indonesia_df.iloc[1].Value) / float(FAO_indonesia_df.iloc[0].Value)\n",
+ "print(f\"The upscaling factor is: {upscaling_factor}\")"
]
},
{
@@ -1456,7 +1461,7 @@
],
"source": [
"## ad upscaling factor to admin area\n",
- "gadm_ind['scl_factor']=upscaling_factor\n",
+ "gadm_ind[\"scl_factor\"] = upscaling_factor\n",
"gadm_ind"
]
},
@@ -1482,8 +1487,10 @@
}
],
"source": [
- "#generate a raster mask with value 1 for the harvest area tiff\n",
- "with rio.open('../raw_data/cotton_earthstat/cotton_HarvAreaYield_Geotiff/cotton_HarvestedAreaHectares.tif') as src:\n",
+ "# generate a raster mask with value 1 for the harvest area tiff\n",
+ "with rio.open(\n",
+ " \"../raw_data/cotton_earthstat/cotton_HarvAreaYield_Geotiff/cotton_HarvestedAreaHectares.tif\"\n",
+ ") as src:\n",
" print(src.profile)"
]
},
@@ -1519,8 +1526,8 @@
}
],
"source": [
- "#check the mask\n",
- "with rio.open('../raw_data/harvest_area_scale_factor_mask_v2.tif') as src:\n",
+ "# check the mask\n",
+ "with rio.open(\"../raw_data/harvest_area_scale_factor_mask_v2.tif\") as src:\n",
" print(src.profile)"
]
},
@@ -1544,12 +1551,11 @@
],
"source": [
"# Use rasterio to import the reprojected data as img\n",
- "out_path = '../raw_data/harvest_area_scale_factor_mask_v2.tif'\n",
+ "out_path = \"../raw_data/harvest_area_scale_factor_mask_v2.tif\"\n",
"with rio.open(out_path) as src:\n",
" arr = src.read(out_shape=(src.height, src.width))\n",
"\n",
"\n",
- "\n",
"plt.imshow(arr[0])\n",
"plt.show()"
]
@@ -1612,16 +1618,14 @@
}
],
"source": [
- "\n",
- "\n",
- "with rio.open('../Processed_data/cotton_2001_harvest_area.tif') as src:\n",
+ "with rio.open(\"../Processed_data/cotton_2001_harvest_area.tif\") as src:\n",
" dat = src.read(1)\n",
- " fig, ax = plt.subplots(figsize=[10,5])\n",
- " ax.set_ylim((-10,8))\n",
- " ax.set_xlim((90,145))\n",
- " rio.plot.show(dat, vmax=100, cmap='BrBG', ax=ax, transform=src.transform)\n",
- " gadm_ind.plot(ax=ax, color='', edgecolor='yellow')\n",
- " ax.set_title('Cotton production in Indonesia 2001 (green: higher production)')"
+ " fig, ax = plt.subplots(figsize=[10, 5])\n",
+ " ax.set_ylim((-10, 8))\n",
+ " ax.set_xlim((90, 145))\n",
+ " rio.plot.show(dat, vmax=100, cmap=\"BrBG\", ax=ax, transform=src.transform)\n",
+ " gadm_ind.plot(ax=ax, color=\"\", edgecolor=\"yellow\")\n",
+ " ax.set_title(\"Cotton production in Indonesia 2001 (green: higher production)\")"
]
},
{
@@ -1666,17 +1670,17 @@
"metadata": {},
"outputs": [],
"source": [
- "#download data from fao\n",
- "#download country specific yield for cotton from 2000 to 2019\n",
+ "# download data from fao\n",
+ "# download country specific yield for cotton from 2000 to 2019\n",
"\n",
"url = \"http://fenixservices.fao.org/faostat/api/v1/en/data/QC?area=5000%3E&area_cs=FAO&element=2413&item=328&item_cs=FAO&year=2000%2C2001%2C2002%2C2003%2C2004%2C2005%2C2006%2C2007%2C2008%2C2009%2C2010%2C2011%2C2012%2C2013%2C2014%2C2015%2C2016%2C2017%2C2018%2C2019&show_codes=true&show_unit=true&show_flags=true&null_values=false&output_type=csv\"\n",
"\n",
"with requests.Session() as s:\n",
" download = s.get(url)\n",
"\n",
- " decoded_content = download.content.decode('utf-8')\n",
+ " decoded_content = download.content.decode(\"utf-8\")\n",
"\n",
- " data = csv.reader(decoded_content.splitlines(), delimiter=',')\n",
+ " data = csv.reader(decoded_content.splitlines(), delimiter=\",\")\n",
" my_list = list(data)\n",
"FAO_df_2000_2019 = pd.DataFrame(my_list, columns=my_list[0])"
]
@@ -1852,20 +1856,23 @@
"metadata": {},
"outputs": [],
"source": [
- "#clean dataframe to get just one year\n",
+ "# clean dataframe to get just one year\n",
"\n",
- "unique_countries = list(FAO_df_2000_2019['Area'].unique())\n",
- "unique_years = list(FAO_df_2000_2019['Year'].unique())\n",
+ "unique_countries = list(FAO_df_2000_2019[\"Area\"].unique())\n",
+ "unique_years = list(FAO_df_2000_2019[\"Year\"].unique())\n",
"list_values = {}\n",
"for country in unique_countries:\n",
" list_values[country] = {}\n",
" for year in unique_years:\n",
" try:\n",
- " value = float(FAO_df_2000_2019[(FAO_df_2000_2019['Area']==country) & (FAO_df_2000_2019['Year']==year)].iloc[0]['Value'])\n",
+ " value = float(\n",
+ " FAO_df_2000_2019[\n",
+ " (FAO_df_2000_2019[\"Area\"] == country) & (FAO_df_2000_2019[\"Year\"] == year)\n",
+ " ].iloc[0][\"Value\"]\n",
+ " )\n",
" except:\n",
- " value = 0 \n",
- " list_values[country][year] = value\n",
- " "
+ " value = 0\n",
+ " list_values[country][year] = value"
]
},
{
@@ -2280,8 +2287,8 @@
}
],
"source": [
- "#calculate percentage change \n",
- "fao_df_pf = fao_df_pf.pct_change(axis='columns')\n",
+ "# calculate percentage change\n",
+ "fao_df_pf = fao_df_pf.pct_change(axis=\"columns\")\n",
"fao_df_pf.head()"
]
},
@@ -2498,8 +2505,8 @@
}
],
"source": [
- "fao_df_pf['mean'] = fao_df_pf.mean(axis=1)\n",
- "fao_df_pf['median'] = fao_df_pf.median(axis=1)\n",
+ "fao_df_pf[\"mean\"] = fao_df_pf.mean(axis=1)\n",
+ "fao_df_pf[\"median\"] = fao_df_pf.median(axis=1)\n",
"fao_df_pf.head()"
]
},
@@ -2838,6 +2845,7 @@
],
"source": [
"import pandas_bokeh\n",
+ "\n",
"pandas_bokeh.output_notebook()"
]
},
@@ -2930,7 +2938,7 @@
}
],
"source": [
- "fao_df_pf.plot_bokeh(kind='bar') "
+ "fao_df_pf.plot_bokeh(kind=\"bar\")"
]
},
{
@@ -3022,7 +3030,7 @@
}
],
"source": [
- "fao_df_pf[['mean','median']].plot_bokeh(kind='bar') "
+ "fao_df_pf[[\"mean\", \"median\"]].plot_bokeh(kind=\"bar\")"
]
},
{
@@ -3114,7 +3122,7 @@
}
],
"source": [
- "fao_df_pf_transpose.plot_bokeh(kind='line')"
+ "fao_df_pf_transpose.plot_bokeh(kind=\"line\")"
]
},
{
@@ -3206,7 +3214,7 @@
}
],
"source": [
- "fao_df_pf_transpose.loc[['mean', 'median']].plot_bokeh(kind='bar')"
+ "fao_df_pf_transpose.loc[[\"mean\", \"median\"]].plot_bokeh(kind=\"bar\")"
]
},
{
@@ -3298,7 +3306,9 @@
}
],
"source": [
- "fao_df_pf_transpose.loc[['mean', 'median']][['Afghanistan', 'Albania', 'Algeria', 'Angola']].plot_bokeh(kind='bar')"
+ "fao_df_pf_transpose.loc[[\"mean\", \"median\"]][\n",
+ " [\"Afghanistan\", \"Albania\", \"Algeria\", \"Angola\"]\n",
+ "].plot_bokeh(kind=\"bar\")"
]
},
{
@@ -3390,7 +3400,7 @@
}
],
"source": [
- "fao_df_pf.transpose()[['Afghanistan', 'Albania', 'Algeria', 'Angola']].plot_bokeh(kind='line')"
+ "fao_df_pf.transpose()[[\"Afghanistan\", \"Albania\", \"Algeria\", \"Angola\"]].plot_bokeh(kind=\"line\")"
]
},
{
diff --git a/data/notebooks/Lab/0_2_Manage_unknonw_locations.ipynb b/data/notebooks/Lab/0_2_Manage_unknonw_locations.ipynb
index 1fdb14008..d53b31cbf 100644
--- a/data/notebooks/Lab/0_2_Manage_unknonw_locations.ipynb
+++ b/data/notebooks/Lab/0_2_Manage_unknonw_locations.ipynb
@@ -52,12 +52,10 @@
"metadata": {},
"outputs": [],
"source": [
- "import pandas as pd\n",
+ "import folium\n",
"import geopandas as gpd\n",
- "import os\n",
+ "import pandas as pd\n",
"from IPython.display import Image\n",
- "import requests\n",
- "import folium\n",
"from processing.geolocating_data import GeolocateAddress"
]
},
@@ -189,7 +187,7 @@
}
],
"source": [
- "input_data = pd.read_csv('../raw_data/LG fake data sheet - Raw materials.csv')\n",
+ "input_data = pd.read_csv(\"../raw_data/LG fake data sheet - Raw materials.csv\")\n",
"input_data.head()"
]
},
@@ -313,7 +311,7 @@
],
"source": [
"## get unknown locations\n",
- "unknown_data = input_data[input_data['Location type'] =='Unknown']\n",
+ "unknown_data = input_data[input_data[\"Location type\"] == \"Unknown\"]\n",
"unknown_data.head()"
]
},
@@ -413,11 +411,11 @@
}
],
"source": [
- "for i in range(0,len(unknown_data_china)):\n",
+ "for i in range(0, len(unknown_data_china)):\n",
" row = unknown_data_china.iloc[0]\n",
- " if row['Location type'] == 'Unknown':\n",
- " country = row['Country']\n",
- " commodity = row['Material']\n",
+ " if row[\"Location type\"] == \"Unknown\":\n",
+ " country = row[\"Country\"]\n",
+ " commodity = row[\"Material\"]\n",
" print(country, commodity)"
]
},
@@ -446,7 +444,7 @@
],
"source": [
"# image showing imports of rubber to chinna\n",
- "Image(filename = \"../raw_data/Screenshot from 2021-04-30 11-29-40.png\", width = 900, height = 300)"
+ "Image(filename=\"../raw_data/Screenshot from 2021-04-30 11-29-40.png\", width=900, height=300)"
]
},
{
@@ -676,9 +674,11 @@
"## category 17 is rubber\n",
"## TODO - get list of importers and categories\n",
"\n",
- "url = 'https://api.resourcetrade.earth/api/rt/2.3/downloads?year=2019&importer=156&category=17&units=value&autozoom=1'\n",
+ "url = \"https://api.resourcetrade.earth/api/rt/2.3/downloads?year=2019&importer=156&category=17&units=value&autozoom=1\"\n",
"\n",
- "df_imports = pd.read_excel (r'../raw_data/resourcetradeearth-all-156-17-2019.xlsx', sheet_name='Exporters')\n",
+ "df_imports = pd.read_excel(\n",
+ " r\"../raw_data/resourcetradeearth-all-156-17-2019.xlsx\", sheet_name=\"Exporters\"\n",
+ ")\n",
"df_imports"
]
},
@@ -895,8 +895,8 @@
}
],
"source": [
- "#do the analysis just for one year - year 2019\n",
- "df_imports_2019 = df_imports[df_imports['Year']==2019]\n",
+ "# do the analysis just for one year - year 2019\n",
+ "df_imports_2019 = df_imports[df_imports[\"Year\"] == 2019]\n",
"df_imports_2019"
]
},
@@ -915,7 +915,7 @@
}
],
"source": [
- "print(f'There are {len(df_imports_2019)} exporters of rubber to chinna')"
+ "print(f\"There are {len(df_imports_2019)} exporters of rubber to chinna\")"
]
},
{
@@ -1535,20 +1535,20 @@
}
],
"source": [
- "#retrieve the geometries for each country \n",
+ "# retrieve the geometries for each country\n",
"geometry_list = []\n",
"for i in range(0, len(df_imports_2019)):\n",
" row = df_imports_2019.iloc[i]\n",
- " country = row['Exporter']\n",
+ " country = row[\"Exporter\"]\n",
" try:\n",
" geolocation = GeolocateAddress(query=country)\n",
- " gdf = gpd.GeoDataFrame.from_features(geolocation.polygon_json, crs='epsg:4326')\n",
- " geom = gdf['geometry'].iloc[0]\n",
+ " gdf = gpd.GeoDataFrame.from_features(geolocation.polygon_json, crs=\"epsg:4326\")\n",
+ " geom = gdf[\"geometry\"].iloc[0]\n",
" except:\n",
- " print(f'Geolocation for the location {country} has failed!')\n",
- " geom = 'None'\n",
- " \n",
- " geometry_list.append(geom)\n"
+ " print(f\"Geolocation for the location {country} has failed!\")\n",
+ " geom = \"None\"\n",
+ "\n",
+ " geometry_list.append(geom)"
]
},
{
@@ -1695,8 +1695,8 @@
}
],
"source": [
- "#append geometry to gdf\n",
- "df_imports_2019['Geometry'] = geometry_list\n",
+ "# append geometry to gdf\n",
+ "df_imports_2019[\"Geometry\"] = geometry_list\n",
"df_imports_2019.head()"
]
},
@@ -1783,7 +1783,7 @@
}
],
"source": [
- "df_imports_2019[df_imports_2019['Geometry']=='None']"
+ "df_imports_2019[df_imports_2019[\"Geometry\"] == \"None\"]"
]
},
{
@@ -1918,8 +1918,8 @@
}
],
"source": [
- "#remove no valid geoms (the none)\n",
- "df_imports_2019 = df_imports_2019[df_imports_2019['Geometry']!='None']\n",
+ "# remove no valid geoms (the none)\n",
+ "df_imports_2019 = df_imports_2019[df_imports_2019[\"Geometry\"] != \"None\"]\n",
"df_imports_2019.head()"
]
},
@@ -1930,8 +1930,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#set geometry to gdf\n",
- "df_imports_2019 = df_imports_2019.set_geometry('Geometry')"
+ "# set geometry to gdf\n",
+ "df_imports_2019 = df_imports_2019.set_geometry(\"Geometry\")"
]
},
{
@@ -1964,7 +1964,6 @@
}
],
"source": [
- "\n",
"df_imports_2019.plot()"
]
},
@@ -1975,7 +1974,17 @@
"metadata": {},
"outputs": [],
"source": [
- "df_imports_2019 = df_imports_2019[['Exporter ISO3', 'Exporter','Resource', 'Year', 'Value (1000USD)', 'Weight (1000kg)', 'Geometry' ]]"
+ "df_imports_2019 = df_imports_2019[\n",
+ " [\n",
+ " \"Exporter ISO3\",\n",
+ " \"Exporter\",\n",
+ " \"Resource\",\n",
+ " \"Year\",\n",
+ " \"Value (1000USD)\",\n",
+ " \"Weight (1000kg)\",\n",
+ " \"Geometry\",\n",
+ " ]\n",
+ "]"
]
},
{
@@ -1996,17 +2005,17 @@
}
],
"source": [
- "#split geolocated data by polygon and points for saving\n",
- "gdf_polygon = df_imports_2019[df_imports_2019['Geometry'].apply(lambda x : x.type!='Point' )]\n",
- "gdf_point = df_imports_2019[df_imports_2019['Geometry'].apply(lambda x : x.type=='Point' )]\n",
+ "# split geolocated data by polygon and points for saving\n",
+ "gdf_polygon = df_imports_2019[df_imports_2019[\"Geometry\"].apply(lambda x: x.type != \"Point\")]\n",
+ "gdf_point = df_imports_2019[df_imports_2019[\"Geometry\"].apply(lambda x: x.type == \"Point\")]\n",
"\n",
"gdf_polygon.to_file(\n",
- " '../Processed_data/china_2019_rubber_imports_polygon.shp',\n",
- " driver='ESRI Shapefile',\n",
+ " \"../Processed_data/china_2019_rubber_imports_polygon.shp\",\n",
+ " driver=\"ESRI Shapefile\",\n",
")\n",
"gdf_point.to_file(\n",
- " '../Processed_data/china_2019_rubber_imports_point.shp',\n",
- " driver='ESRI Shapefile',\n",
+ " \"../Processed_data/china_2019_rubber_imports_point.shp\",\n",
+ " driver=\"ESRI Shapefile\",\n",
")"
]
},
@@ -2017,11 +2026,7 @@
"metadata": {},
"outputs": [],
"source": [
- "gdf_polygon.sort_values(\n",
- " 'Value (1000USD)',\n",
- " ascending=False,\n",
- " inplace = True\n",
- ")"
+ "gdf_polygon.sort_values(\"Value (1000USD)\", ascending=False, inplace=True)"
]
},
{
@@ -2186,9 +2191,9 @@
}
],
"source": [
- "top_exporters = top_exporters.set_crs('epsg:4326')\n",
- "m = folium.Map(location=[43.062776, -75.420884],tiles=\"cartodbpositron\", zoom_start=7)\n",
- "folium.GeoJson(data=top_exporters[\"Geometry\"]).add_to(m) \n",
+ "top_exporters = top_exporters.set_crs(\"epsg:4326\")\n",
+ "m = folium.Map(location=[43.062776, -75.420884], tiles=\"cartodbpositron\", zoom_start=7)\n",
+ "folium.GeoJson(data=top_exporters[\"Geometry\"]).add_to(m)\n",
"m"
]
},
@@ -2231,10 +2236,9 @@
}
],
"source": [
- "Country_production_perct = (Export_quantity*100)/production\n",
- "print(f'The country production is: {Country_production_perct} %')\n",
- "print(f'The country import is: {100-Country_production_perct}%')\n",
- " "
+ "Country_production_perct = (Export_quantity * 100) / production\n",
+ "print(f\"The country production is: {Country_production_perct} %\")\n",
+ "print(f\"The country import is: {100-Country_production_perct}%\")"
]
},
{
@@ -2264,14 +2268,14 @@
],
"source": [
"# total value in china for 2019 is 11377881.6251786\n",
- "total_value_traders = sum(list(top_exporters['Value (1000USD)']))\n",
- "print(f'The total value is: {total_value_traders}')\n",
+ "total_value_traders = sum(list(top_exporters[\"Value (1000USD)\"]))\n",
+ "print(f\"The total value is: {total_value_traders}\")\n",
"\n",
"weight_list = []\n",
"for i in range(0, len(top_exporters)):\n",
" row = top_exporters.iloc[i]\n",
- " value = row['Value (1000USD)']\n",
- " weight_value = value/ total_value_traders\n",
+ " value = row[\"Value (1000USD)\"]\n",
+ " weight_value = value / total_value_traders\n",
" weight_list.append(weight_value)"
]
},
@@ -2293,7 +2297,7 @@
}
],
"source": [
- "#check that total weight is 1\n",
+ "# check that total weight is 1\n",
"sum(weight_list)"
]
},
@@ -2317,7 +2321,7 @@
}
],
"source": [
- "top_exporters['Weight_value'] = weight_list"
+ "top_exporters[\"Weight_value\"] = weight_list"
]
},
{
@@ -2494,8 +2498,8 @@
],
"source": [
"top_exporters.to_file(\n",
- " '../Processed_data/china_2019_rubber_imports_topExprters.shp',\n",
- " driver='ESRI Shapefile',\n",
+ " \"../Processed_data/china_2019_rubber_imports_topExprters.shp\",\n",
+ " driver=\"ESRI Shapefile\",\n",
")"
]
},
diff --git a/data/notebooks/Lab/0_3_H3_Data_Exploration.ipynb b/data/notebooks/Lab/0_3_H3_Data_Exploration.ipynb
index 4ab2bddc4..7b62b1907 100644
--- a/data/notebooks/Lab/0_3_H3_Data_Exploration.ipynb
+++ b/data/notebooks/Lab/0_3_H3_Data_Exploration.ipynb
@@ -39,7 +39,7 @@
],
"source": [
"# insert code here\n",
- "!pip install h3\n"
+ "!pip install h3"
]
},
{
@@ -55,22 +55,16 @@
"metadata": {},
"outputs": [],
"source": [
- "#import library\n",
+ "# import library\n",
"\n",
"import json\n",
- "import pandas as pd\n",
- "from pandas.io.json import json_normalize\n",
- "import numpy as np\n",
"\n",
- "import statistics\n",
- "import statsmodels as sm\n",
- "import statsmodels.formula.api as sm_formula\n",
- "from scipy import stats\n",
+ "import pandas as pd\n",
"\n",
- "#import tensorflow as tf\n",
- "#from tensorflow.keras import layers, models\n",
+ "# import tensorflow as tf\n",
+ "# from tensorflow.keras import layers, models\n",
"#\n",
- "#print(tf.__version__)"
+ "# print(tf.__version__)"
]
},
{
@@ -80,7 +74,8 @@
"outputs": [],
"source": [
"import warnings\n",
- "warnings.filterwarnings('ignore')"
+ "\n",
+ "warnings.filterwarnings(\"ignore\")"
]
},
{
@@ -89,16 +84,14 @@
"metadata": {},
"outputs": [],
"source": [
- "import h3\n",
- "\n",
"import geopandas as gpd\n",
- "\n",
+ "import h3\n",
+ "from geojson.feature import *\n",
"from shapely import geometry, ops\n",
- "#import libpysal as pys\n",
- "#import esda\n",
- "#import pointpats as pp\n",
"\n",
- "from geojson.feature import *"
+ "# import libpysal as pys\n",
+ "# import esda\n",
+ "# import pointpats as pp"
]
},
{
@@ -107,21 +100,13 @@
"metadata": {},
"outputs": [],
"source": [
- "#from annoy import AnnoyIndex\n",
- "\n",
- "import bisect\n",
- "import itertools\n",
- "#from more_itertools import unique_everseen\n",
+ "# from annoy import AnnoyIndex\n",
"\n",
"import math\n",
- "import random\n",
- "import decimal\n",
"from collections import Counter\n",
"\n",
- "from pprint import pprint\n",
- "import copy\n",
"\n",
- "from tqdm import tqdm\n"
+ "# from more_itertools import unique_everseen"
]
},
{
@@ -130,22 +115,12 @@
"metadata": {},
"outputs": [],
"source": [
- "#import pydeck\n",
+ "# import pydeck\n",
"\n",
- "from folium import Map, Marker, GeoJson\n",
- "from folium.plugins import MarkerCluster\n",
- "import branca.colormap as cm\n",
- "from branca.colormap import linear\n",
"import folium\n",
- "\n",
- "import seaborn as sns\n",
- "\n",
- "import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
- "from matplotlib.pyplot import imshow\n",
- "import matplotlib.gridspec as gridspec\n",
- "\n",
- "from PIL import Image as pilim\n",
+ "from folium import GeoJson, Map, Marker\n",
+ "from folium.plugins import MarkerCluster\n",
"\n",
"%matplotlib inline"
]
@@ -406,28 +381,30 @@
"list_hex_area_sqm = []\n",
"\n",
"for i in range(0, max_res + 1):\n",
- " ekm = h3.edge_length(resolution=i, unit='km')\n",
- " em = h3.edge_length(resolution=i, unit='m')\n",
+ " ekm = h3.edge_length(resolution=i, unit=\"km\")\n",
+ " em = h3.edge_length(resolution=i, unit=\"m\")\n",
" list_hex_edge_km.append(round(ekm, 3))\n",
" list_hex_edge_m.append(round(em, 3))\n",
" list_hex_perimeter_km.append(round(6 * ekm, 3))\n",
" list_hex_perimeter_m.append(round(6 * em, 3))\n",
"\n",
- " akm = h3.hex_area(resolution=i, unit='km^2')\n",
- " am = h3.hex_area(resolution=i, unit='m^2')\n",
+ " akm = h3.hex_area(resolution=i, unit=\"km^2\")\n",
+ " am = h3.hex_area(resolution=i, unit=\"m^2\")\n",
" list_hex_area_sqkm.append(round(akm, 3))\n",
" list_hex_area_sqm.append(round(am, 3))\n",
"\n",
- "df_meta = pd.DataFrame({\"edge_length_km\": list_hex_edge_km,\n",
- " \"perimeter_km\": list_hex_perimeter_km,\n",
- " \"area_sqkm\": list_hex_area_sqkm,\n",
- " \"edge_length_m\": list_hex_edge_m,\n",
- " \"perimeter_m\": list_hex_perimeter_m,\n",
- " \"area_sqm\": list_hex_area_sqm\n",
- " })\n",
+ "df_meta = pd.DataFrame(\n",
+ " {\n",
+ " \"edge_length_km\": list_hex_edge_km,\n",
+ " \"perimeter_km\": list_hex_perimeter_km,\n",
+ " \"area_sqkm\": list_hex_area_sqkm,\n",
+ " \"edge_length_m\": list_hex_edge_m,\n",
+ " \"perimeter_m\": list_hex_perimeter_m,\n",
+ " \"area_sqm\": list_hex_area_sqm,\n",
+ " }\n",
+ ")\n",
"\n",
- "df_meta[[\"edge_length_km\", \"perimeter_km\", \"area_sqkm\", \n",
- " \"edge_length_m\", \"perimeter_m\", \"area_sqm\"]]"
+ "df_meta[[\"edge_length_km\", \"perimeter_km\", \"area_sqkm\", \"edge_length_m\", \"perimeter_m\", \"area_sqm\"]]"
]
},
{
@@ -649,36 +626,29 @@
}
],
"source": [
- "lat_centr_point = -0.25 \n",
+ "lat_centr_point = -0.25\n",
"lon_centr_point = 112.43\n",
- "#43.600378, 1.445478\n",
+ "# 43.600378, 1.445478\n",
"list_hex_res = []\n",
"list_hex_res_geom = []\n",
"list_res = range(0, max_res + 1)\n",
"\n",
"for resolution in range(0, max_res + 1):\n",
" # index the point in the H3 hexagon of given index resolution\n",
- " h = h3.geo_to_h3(lat = lat_centr_point,\n",
- " lng = lon_centr_point,\n",
- " resolution = resolution\n",
- " )\n",
+ " h = h3.geo_to_h3(lat=lat_centr_point, lng=lon_centr_point, resolution=resolution)\n",
"\n",
" list_hex_res.append(h)\n",
" # get the geometry of the hexagon and convert to geojson\n",
- " h_geom = {\"type\": \"Polygon\",\n",
- " \"coordinates\": [h3.h3_to_geo_boundary(h = h, geo_json = True)]\n",
- " }\n",
+ " h_geom = {\"type\": \"Polygon\", \"coordinates\": [h3.h3_to_geo_boundary(h=h, geo_json=True)]}\n",
" list_hex_res_geom.append(h_geom)\n",
"\n",
"\n",
- "df_res_point = pd.DataFrame({\"res\": list_res,\n",
- " \"hex_id\": list_hex_res,\n",
- " \"geometry\": list_hex_res_geom\n",
- " })\n",
- "df_res_point[\"hex_id_binary\"] = df_res_point[\"hex_id\"].apply(\n",
- " lambda x: bin(int(x, 16))[2:])\n",
+ "df_res_point = pd.DataFrame(\n",
+ " {\"res\": list_res, \"hex_id\": list_hex_res, \"geometry\": list_hex_res_geom}\n",
+ ")\n",
+ "df_res_point[\"hex_id_binary\"] = df_res_point[\"hex_id\"].apply(lambda x: bin(int(x, 16))[2:])\n",
"\n",
- "pd.set_option('display.max_colwidth', 63)\n",
+ "pd.set_option(\"display.max_colwidth\", 63)\n",
"df_res_point"
]
},
@@ -712,20 +682,21 @@
}
],
"source": [
- "map_example = Map(location = [-0.25 , 112.43],\n",
- " zoom_start = 5.5,\n",
- " tiles = \"cartodbpositron\",\n",
- " attr = '''© \n",
+ "map_example = Map(\n",
+ " location=[-0.25, 112.43],\n",
+ " zoom_start=5.5,\n",
+ " tiles=\"cartodbpositron\",\n",
+ " attr=\"\"\"© \n",
" OpenStreetMapcontributors ©\n",
" \n",
- " CartoDB'''\n",
- " )\n",
+ " CartoDB\"\"\",\n",
+ ")\n",
"\n",
"list_features = []\n",
"for i, row in df_res_point.iterrows():\n",
- " feature = Feature(geometry = row[\"geometry\"],\n",
- " id = row[\"hex_id\"],\n",
- " properties = {\"resolution\": int(row[\"res\"])})\n",
+ " feature = Feature(\n",
+ " geometry=row[\"geometry\"], id=row[\"hex_id\"], properties={\"resolution\": int(row[\"res\"])}\n",
+ " )\n",
" list_features.append(feature)\n",
"\n",
"feat_collection = FeatureCollection(list_features)\n",
@@ -733,19 +704,17 @@
"\n",
"\n",
"GeoJson(\n",
- " geojson_result,\n",
- " style_function = lambda feature: {\n",
- " 'fillColor': None,\n",
- " 'color': (\"green\"\n",
- " if feature['properties']['resolution'] % 2 == 0\n",
- " else \"red\"),\n",
- " 'weight': 2,\n",
- " 'fillOpacity': 0.05\n",
- " },\n",
- " name = \"Example\"\n",
- " ).add_to(map_example)\n",
+ " geojson_result,\n",
+ " style_function=lambda feature: {\n",
+ " \"fillColor\": None,\n",
+ " \"color\": (\"green\" if feature[\"properties\"][\"resolution\"] % 2 == 0 else \"red\"),\n",
+ " \"weight\": 2,\n",
+ " \"fillOpacity\": 0.05,\n",
+ " },\n",
+ " name=\"Example\",\n",
+ ").add_to(map_example)\n",
"\n",
- "map_example.save('maps/1_resolutions.html')\n",
+ "map_example.save(\"maps/1_resolutions.html\")\n",
"map_example"
]
},
@@ -777,25 +746,19 @@
],
"source": [
"res_parent = 9\n",
- "h3_cell_parent = h3.geo_to_h3(lat = lat_centr_point,\n",
- " lng = lon_centr_point,\n",
- " resolution = res_parent\n",
- " )\n",
- "h3_cells_children = list(h3.h3_to_children(h = h3_cell_parent))\n",
- "assert(len(h3_cells_children) == math.pow(7, 1))\n",
+ "h3_cell_parent = h3.geo_to_h3(lat=lat_centr_point, lng=lon_centr_point, resolution=res_parent)\n",
+ "h3_cells_children = list(h3.h3_to_children(h=h3_cell_parent))\n",
+ "assert len(h3_cells_children) == math.pow(7, 1)\n",
"# ------\n",
- "h3_cells_grandchildren = list(h3.h3_to_children(h = h3_cell_parent, \n",
- " res = res_parent + 2))\n",
- "assert(len(h3_cells_grandchildren) == math.pow(7, 2))\n",
+ "h3_cells_grandchildren = list(h3.h3_to_children(h=h3_cell_parent, res=res_parent + 2))\n",
+ "assert len(h3_cells_grandchildren) == math.pow(7, 2)\n",
"# ------\n",
- "h3_cells_2xgrandchildren = list(h3.h3_to_children(h = h3_cell_parent, \n",
- " res = res_parent + 3))\n",
- "assert(len(h3_cells_2xgrandchildren) == math.pow(7, 3))\n",
+ "h3_cells_2xgrandchildren = list(h3.h3_to_children(h=h3_cell_parent, res=res_parent + 3))\n",
+ "assert len(h3_cells_2xgrandchildren) == math.pow(7, 3)\n",
"\n",
"# ------\n",
- "h3_cells_3xgrandchildren = list(h3.h3_to_children(h = h3_cell_parent, \n",
- " res = res_parent + 4))\n",
- "assert(len(h3_cells_3xgrandchildren) == math.pow(7, 4))\n",
+ "h3_cells_3xgrandchildren = list(h3.h3_to_children(h=h3_cell_parent, res=res_parent + 4))\n",
+ "assert len(h3_cells_3xgrandchildren) == math.pow(7, 4)\n",
"# ------\n",
"\n",
"msg_ = \"\"\"Parent cell: {} has :\n",
@@ -803,10 +766,15 @@
" {} grandchildren,\n",
" {} grandgrandchildren, \n",
" {} grandgrandgrandchildren\"\"\"\n",
- "print(msg_.format(h3_cell_parent, len(h3_cells_children),\n",
- " len(h3_cells_grandchildren), \n",
- " len(h3_cells_2xgrandchildren),\n",
- " len(h3_cells_3xgrandchildren)))"
+ "print(\n",
+ " msg_.format(\n",
+ " h3_cell_parent,\n",
+ " len(h3_cells_children),\n",
+ " len(h3_cells_grandchildren),\n",
+ " len(h3_cells_2xgrandchildren),\n",
+ " len(h3_cells_3xgrandchildren),\n",
+ " )\n",
+ ")"
]
},
{
@@ -816,56 +784,59 @@
"outputs": [],
"source": [
"def plot_parent_and_descendents(h3_cell_parent, h3_cells_children, ax=None):\n",
- " \n",
" list_distances_to_center = []\n",
- " \n",
+ "\n",
" if ax is None:\n",
- " fig, ax = plt.subplots(1, 1, figsize = (5, 5))\n",
- " \n",
+ " fig, ax = plt.subplots(1, 1, figsize=(5, 5))\n",
+ "\n",
" boundary_parent_coords = h3.h3_to_geo_boundary(h=h3_cell_parent, geo_json=True)\n",
" boundary_parent = geometry.Polygon(boundary_parent_coords)\n",
" # print(boundary_parent.wkt, \"\\n\")\n",
" res_parent = h3.h3_get_resolution(h3_cell_parent)\n",
- " \n",
+ "\n",
" # get the central descendent at the resolution of h3_cells_children\n",
" res_children = h3.h3_get_resolution(h3_cells_children[0])\n",
- " centerhex = h3.h3_to_center_child(h = h3_cell_parent, res = res_children)\n",
+ " centerhex = h3.h3_to_center_child(h=h3_cell_parent, res=res_children)\n",
"\n",
" # get the boundary of the multipolygon of the H3 cells union\n",
" boundary_children_union_coords = h3.h3_set_to_multi_polygon(\n",
- " hexes = h3_cells_children,\n",
- " geo_json = True)[0][0]\n",
+ " hexes=h3_cells_children, geo_json=True\n",
+ " )[0][0]\n",
" # close the linestring\n",
" boundary_children_union_coords.append(boundary_children_union_coords[0])\n",
" boundary_children_union = geometry.Polygon(boundary_children_union_coords)\n",
" # print(boundary_children_union.wkt, \"\\n\")\n",
- " \n",
+ "\n",
" # compute the overlapping geometry\n",
" # (the intersection of the boundary_parent with boundary_children_union):\n",
" overlap_geom = boundary_parent.intersection(boundary_children_union)\n",
- " print(\"overlap approx: {}\".format(round(overlap_geom.area / boundary_parent.area, 4))) \n",
+ " print(\"overlap approx: {}\".format(round(overlap_geom.area / boundary_parent.area, 4)))\n",
"\n",
" # plot\n",
" dict_adjust_textpos = {7: 0.0003, 8: 0.0001, 9: 0.00005, 10: 0.00002}\n",
- " \n",
+ "\n",
" for child in h3_cells_children:\n",
- " boundary_child_coords = h3.h3_to_geo_boundary(h = child, geo_json = True)\n",
+ " boundary_child_coords = h3.h3_to_geo_boundary(h=child, geo_json=True)\n",
" boundary_child = geometry.Polygon(boundary_child_coords)\n",
- " ax.plot(*boundary_child.exterior.coords.xy, color = \"grey\", linestyle=\"--\")\n",
- " \n",
- " dist_to_centerhex = h3.h3_distance(h1 = centerhex, h2 = child)\n",
+ " ax.plot(*boundary_child.exterior.coords.xy, color=\"grey\", linestyle=\"--\")\n",
+ "\n",
+ " dist_to_centerhex = h3.h3_distance(h1=centerhex, h2=child)\n",
" list_distances_to_center.append(dist_to_centerhex)\n",
- " \n",
+ "\n",
" if res_children <= res_parent + 3:\n",
" # add text\n",
- " ax.text(x = boundary_child.centroid.x - dict_adjust_textpos[res_parent],\n",
- " y = boundary_child.centroid.y - dict_adjust_textpos[res_parent],\n",
- " s = str(dist_to_centerhex),\n",
- " fontsize = 12, color = \"black\", weight = \"bold\")\n",
- " \n",
- " ax.plot(*boundary_children_union.exterior.coords.xy, color = \"blue\")\n",
- " ax.plot(*boundary_parent.exterior.coords.xy, color = \"red\", linewidth=2)\n",
- " \n",
+ " ax.text(\n",
+ " x=boundary_child.centroid.x - dict_adjust_textpos[res_parent],\n",
+ " y=boundary_child.centroid.y - dict_adjust_textpos[res_parent],\n",
+ " s=str(dist_to_centerhex),\n",
+ " fontsize=12,\n",
+ " color=\"black\",\n",
+ " weight=\"bold\",\n",
+ " )\n",
+ "\n",
+ " ax.plot(*boundary_children_union.exterior.coords.xy, color=\"blue\")\n",
+ " ax.plot(*boundary_parent.exterior.coords.xy, color=\"red\", linewidth=2)\n",
+ "\n",
" return list_distances_to_center"
]
},
@@ -898,19 +869,19 @@
}
],
"source": [
- "fig, ax = plt.subplots(2, 2, figsize = (20, 20))\n",
- "list_distances_to_center_dc = plot_parent_and_descendents(h3_cell_parent, \n",
- " h3_cells_children, \n",
- " ax = ax[0][0])\n",
- "list_distances_to_center_gc = plot_parent_and_descendents(h3_cell_parent,\n",
- " h3_cells_grandchildren,\n",
- " ax = ax[0][1])\n",
- "list_distances_to_center_2xgc = plot_parent_and_descendents(h3_cell_parent, \n",
- " h3_cells_2xgrandchildren, \n",
- " ax = ax[1][0])\n",
- "list_distances_to_center_3xgc = plot_parent_and_descendents(h3_cell_parent,\n",
- " h3_cells_3xgrandchildren,\n",
- " ax = ax[1][1])\n",
+ "fig, ax = plt.subplots(2, 2, figsize=(20, 20))\n",
+ "list_distances_to_center_dc = plot_parent_and_descendents(\n",
+ " h3_cell_parent, h3_cells_children, ax=ax[0][0]\n",
+ ")\n",
+ "list_distances_to_center_gc = plot_parent_and_descendents(\n",
+ " h3_cell_parent, h3_cells_grandchildren, ax=ax[0][1]\n",
+ ")\n",
+ "list_distances_to_center_2xgc = plot_parent_and_descendents(\n",
+ " h3_cell_parent, h3_cells_2xgrandchildren, ax=ax[1][0]\n",
+ ")\n",
+ "list_distances_to_center_3xgc = plot_parent_and_descendents(\n",
+ " h3_cell_parent, h3_cells_3xgrandchildren, ax=ax[1][1]\n",
+ ")\n",
"\n",
"\n",
"ax[0][0].set_title(\"Direct children (res 10)\")\n",
@@ -983,18 +954,13 @@
"metadata": {},
"outputs": [],
"source": [
- "def explore_ij_coords(lat_point, lon_point, num_rings = 3, ax = None):\n",
- "\n",
+ "def explore_ij_coords(lat_point, lon_point, num_rings=3, ax=None):\n",
" # an example at resolution 9\n",
- " hex_id_ex = h3.geo_to_h3(lat = lat_point,\n",
- " lng = lon_point,\n",
- " resolution = 9\n",
- " )\n",
- " assert(h3.h3_get_resolution(hex_id_ex) == 9)\n",
+ " hex_id_ex = h3.geo_to_h3(lat=lat_point, lng=lon_point, resolution=9)\n",
+ " assert h3.h3_get_resolution(hex_id_ex) == 9\n",
"\n",
" # get its rings\n",
- " list_siblings = list(h3.hex_range_distances(h = hex_id_ex, \n",
- " K = num_rings))\n",
+ " list_siblings = list(h3.hex_range_distances(h=hex_id_ex, K=num_rings))\n",
"\n",
" dict_ij = {}\n",
" dict_color = {}\n",
@@ -1002,10 +968,9 @@
"\n",
" if ax is None:\n",
" figsize = (min(6 * num_rings, 15), min(6 * num_rings, 15))\n",
- " fig, ax = plt.subplots(1, 1, figsize = figsize)\n",
+ " fig, ax = plt.subplots(1, 1, figsize=figsize)\n",
"\n",
" for ring_level in range(len(list_siblings)):\n",
- "\n",
" if ring_level == 0:\n",
" fontcol = \"red\"\n",
" elif ring_level == 1:\n",
@@ -1017,52 +982,56 @@
"\n",
" if ring_level == 0:\n",
" # on ring 0 is only hex_id_ex\n",
- " geom_boundary_coords = h3.h3_to_geo_boundary(hex_id_ex,\n",
- " geo_json = True)\n",
+ " geom_boundary_coords = h3.h3_to_geo_boundary(hex_id_ex, geo_json=True)\n",
" geom_shp = geometry.Polygon(geom_boundary_coords)\n",
- " ax.plot(*geom_shp.exterior.xy, color = \"purple\")\n",
+ " ax.plot(*geom_shp.exterior.xy, color=\"purple\")\n",
"\n",
- " ij_ex = h3.experimental_h3_to_local_ij(origin = hex_id_ex,\n",
- " h = hex_id_ex)\n",
+ " ij_ex = h3.experimental_h3_to_local_ij(origin=hex_id_ex, h=hex_id_ex)\n",
" s = \" {} \\n \\n (0,0)\".format(ij_ex)\n",
"\n",
" dict_ij[hex_id_ex] = ij_ex\n",
" dict_color[hex_id_ex] = \"red\"\n",
- " dict_s[hex_id_ex] = s \n",
+ " dict_s[hex_id_ex] = s\n",
"\n",
- " ax.text(x = geom_shp.centroid.x - 0.0017,\n",
- " y = geom_shp.centroid.y - 0.0005,\n",
- " s = s,\n",
- " fontsize = 11, color = fontcol, weight = \"bold\")\n",
+ " ax.text(\n",
+ " x=geom_shp.centroid.x - 0.0017,\n",
+ " y=geom_shp.centroid.y - 0.0005,\n",
+ " s=s,\n",
+ " fontsize=11,\n",
+ " color=fontcol,\n",
+ " weight=\"bold\",\n",
+ " )\n",
" else:\n",
" # get the hex ids resident on ring_level\n",
" siblings_on_ring = list(list_siblings[ring_level])\n",
"\n",
" k = 1\n",
" for sibling_hex in sorted(siblings_on_ring):\n",
- " geom_boundary_coords = h3.h3_to_geo_boundary(sibling_hex,\n",
- " geo_json=True)\n",
+ " geom_boundary_coords = h3.h3_to_geo_boundary(sibling_hex, geo_json=True)\n",
" geom_shp = geometry.Polygon(geom_boundary_coords)\n",
- " ax.plot(*geom_shp.exterior.xy, color = \"purple\")\n",
+ " ax.plot(*geom_shp.exterior.xy, color=\"purple\")\n",
"\n",
- " ij = h3.experimental_h3_to_local_ij(origin = hex_id_ex,\n",
- " h = sibling_hex)\n",
+ " ij = h3.experimental_h3_to_local_ij(origin=hex_id_ex, h=sibling_hex)\n",
" ij_diff = (ij[0] - ij_ex[0], ij[1] - ij_ex[1])\n",
" s = \" {} \\n \\n {}\".format(ij, ij_diff)\n",
" k = k + 1\n",
"\n",
- " dict_ij[sibling_hex] = ij \n",
+ " dict_ij[sibling_hex] = ij\n",
" dict_color[sibling_hex] = fontcol\n",
" dict_s[sibling_hex] = s\n",
"\n",
- " ax.text(x = geom_shp.centroid.x - 0.0017,\n",
- " y = geom_shp.centroid.y - 0.0005,\n",
- " s = s,\n",
- " fontsize = 11, color = fontcol, weight = \"bold\")\n",
+ " ax.text(\n",
+ " x=geom_shp.centroid.x - 0.0017,\n",
+ " y=geom_shp.centroid.y - 0.0005,\n",
+ " s=s,\n",
+ " fontsize=11,\n",
+ " color=fontcol,\n",
+ " weight=\"bold\",\n",
+ " )\n",
"\n",
" ax.set_ylabel(\"Latitude\")\n",
" ax.set_xlabel(\"Longitude\")\n",
- " \n",
+ "\n",
" return dict_ij, dict_color, dict_s"
]
},
@@ -1085,8 +1054,9 @@
}
],
"source": [
- "dict_ij, dict_color, dict_s = explore_ij_coords(lat_point = lat_centr_point,\n",
- " lon_point = lon_centr_point)"
+ "dict_ij, dict_color, dict_s = explore_ij_coords(\n",
+ " lat_point=lat_centr_point, lon_point=lon_centr_point\n",
+ ")"
]
},
{
@@ -1188,10 +1158,10 @@
],
"source": [
"# get geometry for india\n",
- "indonesia_loc = GeolocateAddress(query='Indonesia')\n",
+ "indonesia_loc = GeolocateAddress(query=\"Indonesia\")\n",
"\n",
- "#generate gdf for india with polygon geometry\n",
- "gdf_indonesia = gpd.GeoDataFrame.from_features(indonesia_loc.polygon_json, crs='epsg:4326')\n",
+ "# generate gdf for india with polygon geometry\n",
+ "gdf_indonesia = gpd.GeoDataFrame.from_features(indonesia_loc.polygon_json, crs=\"epsg:4326\")\n",
"gdf_indonesia"
]
},
@@ -1267,17 +1237,17 @@
"metadata": {},
"outputs": [],
"source": [
- "\n",
"def base_empty_map():\n",
" \"\"\"Prepares a folium map centered in a central GPS point of Toulouse\"\"\"\n",
- " m = Map(location = [-0.25 , 112.43],\n",
- " zoom_start = 9.5,\n",
- " tiles = \"cartodbpositron\",\n",
- " attr = '''© \n",
+ " m = Map(\n",
+ " location=[-0.25, 112.43],\n",
+ " zoom_start=9.5,\n",
+ " tiles=\"cartodbpositron\",\n",
+ " attr=\"\"\"© \n",
" OpenStreetMapcontributors ©\n",
" \n",
- " CartoDB'''\n",
- " )\n",
+ " CartoDB\"\"\",\n",
+ " )\n",
" return m"
]
},
@@ -1816,7 +1786,7 @@
}
],
"source": [
- "raw_data = gpd.read_file('../../datasets/processed/located_lg_data_point_v2.shp')\n",
+ "raw_data = gpd.read_file(\"../../datasets/processed/located_lg_data_point_v2.shp\")\n",
"raw_data"
]
},
@@ -1826,11 +1796,11 @@
"metadata": {},
"outputs": [],
"source": [
- "## add the \n",
- "long_list = [raw_data.iloc[i]['geometry'].x for i in range(0, len(raw_data))]\n",
- "lat_list = [raw_data.iloc[i]['geometry'].y for i in range(0, len(raw_data))]\n",
- "raw_data['Latitude']=lat_list\n",
- "raw_data['Longitude']=long_list"
+ "## add the\n",
+ "long_list = [raw_data.iloc[i][\"geometry\"].x for i in range(0, len(raw_data))]\n",
+ "lat_list = [raw_data.iloc[i][\"geometry\"].y for i in range(0, len(raw_data))]\n",
+ "raw_data[\"Latitude\"] = lat_list\n",
+ "raw_data[\"Longitude\"] = long_list"
]
},
{
@@ -1859,9 +1829,9 @@
"mc = MarkerCluster()\n",
"\n",
"for i, row in raw_data.iterrows():\n",
- " mk = Marker(location = [row[\"Latitude\"], row[\"Longitude\"]])\n",
+ " mk = Marker(location=[row[\"Latitude\"], row[\"Longitude\"]])\n",
" mk.add_to(mc)\n",
- " \n",
+ "\n",
"mc.add_to(m)\n",
"m"
]
@@ -1966,8 +1936,10 @@
}
],
"source": [
- "gdf_raw_cpy = raw_data.reset_index(inplace = False, drop = False)\n",
- "df = gdf_raw_cpy.groupby(by=['Longitude', 'Latitude']).agg({'Material':'first', 'Volume':sum, 'Country': 'first'})\n",
+ "gdf_raw_cpy = raw_data.reset_index(inplace=False, drop=False)\n",
+ "df = gdf_raw_cpy.groupby(by=[\"Longitude\", \"Latitude\"]).agg(\n",
+ " {\"Material\": \"first\", \"Volume\": sum, \"Country\": \"first\"}\n",
+ ")\n",
"df.reset_index(inplace=True, drop=False)\n",
"df.head()"
]
@@ -2002,18 +1974,11 @@
" msg_ = \"At resolution {} --> H3 cell id : {} and its geometry: {} \"\n",
" print(msg_.format(res, col_hex_id, col_geom))\n",
" df[col_hex_id] = df.apply(\n",
- " lambda row: h3.geo_to_h3(\n",
- " lat = row[\"Latitude\"],\n",
- " lng = row[\"Longitude\"],\n",
- " resolution = res),\n",
- " axis = 1)\n",
+ " lambda row: h3.geo_to_h3(lat=row[\"Latitude\"], lng=row[\"Longitude\"], resolution=res), axis=1\n",
+ " )\n",
" # use h3.h3_to_geo_boundary to obtain the geometries of these hexagons\n",
" df[col_geom] = df[col_hex_id].apply(\n",
- " lambda x: {\"type\": \"Polygon\",\n",
- " \"coordinates\":\n",
- " [h3.h3_to_geo_boundary(\n",
- " h=x, geo_json=True)]\n",
- " }\n",
+ " lambda x: {\"type\": \"Polygon\", \"coordinates\": [h3.h3_to_geo_boundary(h=x, geo_json=True)]}\n",
" )\n",
" df.head().T"
]
@@ -2414,7 +2379,7 @@
}
],
"source": [
- "ind_gdf = gpd.read_file('../../datasets/raw/ind_geo.json')\n",
+ "ind_gdf = gpd.read_file(\"../../datasets/raw/ind_geo.json\")\n",
"ind_gdf.head()"
]
},
@@ -2502,22 +2467,23 @@
"source": [
"## multipolygon to polygon\n",
"import geopandas as gpd\n",
- "from shapely.geometry.polygon import Polygon\n",
"from shapely.geometry.multipolygon import MultiPolygon\n",
+ "from shapely.geometry.polygon import Polygon\n",
+ "\n",
"\n",
"def explode(indf):\n",
- " #indf = gpd.GeoDataFrame.from_file(indata)\n",
+ " # indf = gpd.GeoDataFrame.from_file(indata)\n",
" outdf = gpd.GeoDataFrame(columns=indf.columns)\n",
" for idx, row in indf.iterrows():\n",
" if type(row.geometry) == Polygon:\n",
- " outdf = outdf.append(row,ignore_index=True)\n",
+ " outdf = outdf.append(row, ignore_index=True)\n",
" if type(row.geometry) == MultiPolygon:\n",
" multdf = gpd.GeoDataFrame(columns=indf.columns)\n",
" recs = len(row.geometry)\n",
- " multdf = multdf.append([row]*recs,ignore_index=True)\n",
+ " multdf = multdf.append([row] * recs, ignore_index=True)\n",
" for geom in range(recs):\n",
- " multdf.loc[geom,'geometry'] = row.geometry[geom]\n",
- " outdf = outdf.append(multdf,ignore_index=True)\n",
+ " multdf.loc[geom, \"geometry\"] = row.geometry[geom]\n",
+ " outdf = outdf.append(multdf, ignore_index=True)\n",
" return outdf"
]
},
@@ -2667,8 +2633,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#save polygon file as geojson\n",
- "ind_gdf_test.to_file('../../datasets/raw/ind_geo_test.json', driver='GeoJSON')"
+ "# save polygon file as geojson\n",
+ "ind_gdf_test.to_file(\"../../datasets/raw/ind_geo_test.json\", driver=\"GeoJSON\")"
]
},
{
@@ -2681,17 +2647,15 @@
" \"\"\"Loads a geojson files of polygon geometries and features,\n",
" swaps the latitude and longitude andstores geojson\"\"\"\n",
" gdf = gpd.read_file(filepath, driver=\"GeoJSON\")\n",
- " \n",
- " gdf[\"geom_geojson\"] = gdf[\"geometry\"].apply(\n",
- " lambda x: geometry.mapping(x))\n",
+ "\n",
+ " gdf[\"geom_geojson\"] = gdf[\"geometry\"].apply(lambda x: geometry.mapping(x))\n",
"\n",
" gdf[\"geom_swap\"] = gdf[\"geometry\"].map(\n",
- " lambda polygon: ops.transform(\n",
- " lambda x, y: (y, x), polygon))\n",
+ " lambda polygon: ops.transform(lambda x, y: (y, x), polygon)\n",
+ " )\n",
+ "\n",
+ " gdf[\"geom_swap_geojson\"] = gdf[\"geom_swap\"].apply(lambda x: geometry.mapping(x))\n",
"\n",
- " gdf[\"geom_swap_geojson\"] = gdf[\"geom_swap\"].apply(\n",
- " lambda x: geometry.mapping(x))\n",
- " \n",
" return gdf"
]
},
@@ -2870,7 +2834,7 @@
}
],
"source": [
- "ind_gdf_swap = load_and_prepare_districts(filepath = '../../datasets/raw/ind_geo_test.json')\n",
+ "ind_gdf_swap = load_and_prepare_districts(filepath=\"../../datasets/raw/ind_geo_test.json\")\n",
"ind_gdf_swap.head()"
]
},
@@ -2955,29 +2919,25 @@
"metadata": {},
"outputs": [],
"source": [
- "def fill_hexagons(geom_geojson, res, flag_swap = False, flag_return_df = False):\n",
+ "def fill_hexagons(geom_geojson, res, flag_swap=False, flag_return_df=False):\n",
" \"\"\"Fills a geometry given in geojson format with H3 hexagons at specified\n",
" resolution. The flag_reverse_geojson allows to specify whether the geometry\n",
" is lon/lat or swapped\"\"\"\n",
"\n",
- " set_hexagons = h3.polyfill(geojson = geom_geojson,\n",
- " res = res,\n",
- " geo_json_conformant = flag_swap)\n",
+ " set_hexagons = h3.polyfill(geojson=geom_geojson, res=res, geo_json_conformant=flag_swap)\n",
" list_hexagons_filling = list(set_hexagons)\n",
"\n",
" if flag_return_df is True:\n",
" # make dataframe\n",
" df_fill_hex = pd.DataFrame({\"hex_id\": list_hexagons_filling})\n",
" df_fill_hex[\"value\"] = 0\n",
- " df_fill_hex['geometry'] = df_fill_hex.hex_id.apply(\n",
- " lambda x:\n",
- " {\"type\": \"Polygon\",\n",
- " \"coordinates\": [\n",
- " h3.h3_to_geo_boundary(h=x,\n",
- " geo_json=True)\n",
- " ]\n",
- " })\n",
- " assert(df_fill_hex.shape[0] == len(list_hexagons_filling))\n",
+ " df_fill_hex[\"geometry\"] = df_fill_hex.hex_id.apply(\n",
+ " lambda x: {\n",
+ " \"type\": \"Polygon\",\n",
+ " \"coordinates\": [h3.h3_to_geo_boundary(h=x, geo_json=True)],\n",
+ " }\n",
+ " )\n",
+ " assert df_fill_hex.shape[0] == len(list_hexagons_filling)\n",
" return df_fill_hex\n",
" else:\n",
" return list_hexagons_filling"
@@ -3243,8 +3203,9 @@
}
],
"source": [
- "\n",
- "ind_gdf_swap[\"hex_fill_initial\"] = ind_gdf_swap[\"geom_swap_geojson\"].apply(lambda x: list(fill_hexagons(geom_geojson = x, res = 13)))\n",
+ "ind_gdf_swap[\"hex_fill_initial\"] = ind_gdf_swap[\"geom_swap_geojson\"].apply(\n",
+ " lambda x: list(fill_hexagons(geom_geojson=x, res=13))\n",
+ ")\n",
"ind_gdf_swap[\"num_hex_fill_initial\"] = ind_gdf_swap[\"hex_fill_initial\"].apply(len)\n",
"\n",
"total_num_hex_initial = ind_gdf_swap[\"num_hex_fill_initial\"].sum()\n",
@@ -3298,7 +3259,7 @@
"metadata": {},
"outputs": [],
"source": [
- "test_gdf = ind_gdf_swap[:15]\n"
+ "test_gdf = ind_gdf_swap[:15]"
]
},
{
@@ -3406,15 +3367,21 @@
"test_gdf[\"hex_fill_compact\"] = test_gdf[\"hex_fill_initial\"].apply(lambda x: list(h3.compact(x)))\n",
"test_gdf[\"num_hex_fill_compact\"] = test_gdf[\"hex_fill_compact\"].apply(len)\n",
"\n",
- "print(\"Reduced number of cells from {} to {} \\n\".format(\n",
- " test_gdf[\"num_hex_fill_initial\"].sum(),\n",
- " test_gdf[\"num_hex_fill_compact\"].sum()))\n",
+ "print(\n",
+ " \"Reduced number of cells from {} to {} \\n\".format(\n",
+ " test_gdf[\"num_hex_fill_initial\"].sum(), test_gdf[\"num_hex_fill_compact\"].sum()\n",
+ " )\n",
+ ")\n",
"\n",
"# count cells by index resolution after compacting\n",
"\n",
- "test_gdf[\"hex_resolutions\"] = test_gdf[\"hex_fill_compact\"].apply(lambda x: [h3.h3_get_resolution(hexid) for hexid in x])\n",
+ "test_gdf[\"hex_resolutions\"] = test_gdf[\"hex_fill_compact\"].apply(\n",
+ " lambda x: [h3.h3_get_resolution(hexid) for hexid in x]\n",
+ ")\n",
"test_gdf[\"hex_resolutions_counts\"] = test_gdf[\"hex_resolutions\"].apply(lambda x: Counter(x))\n",
- "test_gdf[[\"geometry\", \"num_hex_fill_initial\", \"num_hex_fill_compact\", \"hex_resolutions_counts\"]].head()"
+ "test_gdf[\n",
+ " [\"geometry\", \"num_hex_fill_initial\", \"num_hex_fill_compact\", \"hex_resolutions_counts\"]\n",
+ "].head()"
]
},
{
@@ -3423,7 +3390,6 @@
"metadata": {},
"outputs": [],
"source": [
- "\n",
"# this column of empty lists is a placeholder, will be used further in this section\n",
"test_gdf[\"compacted_novoids\"] = [[] for _ in range(test_gdf.shape[0])]"
]
@@ -3434,50 +3400,51 @@
"metadata": {},
"outputs": [],
"source": [
- "def plot_basemap_region_fill(df_boundaries_zones, initial_map = None):\n",
- " \n",
+ "def plot_basemap_region_fill(df_boundaries_zones, initial_map=None):\n",
" \"\"\"On a folium map, add the boundaries of the geometries in geojson formatted\n",
- " column of df_boundaries_zones\"\"\"\n",
+ " column of df_boundaries_zones\"\"\"\n",
"\n",
" if initial_map is None:\n",
" initial_map = base_empty_map()\n",
"\n",
- " feature_group = folium.FeatureGroup(name='Boundaries')\n",
+ " feature_group = folium.FeatureGroup(name=\"Boundaries\")\n",
"\n",
" for i, row in df_boundaries_zones.iterrows():\n",
- " feature_sel = Feature(geometry = row[\"geom_geojson\"], id=str(i))\n",
+ " feature_sel = Feature(geometry=row[\"geom_geojson\"], id=str(i))\n",
" feat_collection_sel = FeatureCollection([feature_sel])\n",
" geojson_subzone = json.dumps(feat_collection_sel)\n",
"\n",
" GeoJson(\n",
- " geojson_subzone,\n",
- " style_function=lambda feature: {\n",
- " 'fillColor': None,\n",
- " 'color': 'blue',\n",
- " 'weight': 5,\n",
- " 'fillOpacity': 0\n",
- " }\n",
- " ).add_to(feature_group)\n",
+ " geojson_subzone,\n",
+ " style_function=lambda feature: {\n",
+ " \"fillColor\": None,\n",
+ " \"color\": \"blue\",\n",
+ " \"weight\": 5,\n",
+ " \"fillOpacity\": 0,\n",
+ " },\n",
+ " ).add_to(feature_group)\n",
"\n",
" feature_group.add_to(initial_map)\n",
" return initial_map\n",
"\n",
- "# ---------------------------------------------------------------------------\n",
"\n",
+ "# ---------------------------------------------------------------------------\n",
"\n",
- "def hexagons_dataframe_to_geojson(df_hex, hex_id_field,\n",
- " geometry_field, value_field,\n",
- " file_output = None):\n",
"\n",
+ "def hexagons_dataframe_to_geojson(\n",
+ " df_hex, hex_id_field, geometry_field, value_field, file_output=None\n",
+ "):\n",
" \"\"\"Produce the GeoJSON representation containing all geometries in a dataframe\n",
- " based on a column in geojson format (geometry_field)\"\"\"\n",
+ " based on a column in geojson format (geometry_field)\"\"\"\n",
"\n",
" list_features = []\n",
"\n",
" for i, row in df_hex.iterrows():\n",
- " feature = Feature(geometry = row[geometry_field],\n",
- " id = row[hex_id_field],\n",
- " properties = {\"value\": row[value_field]})\n",
+ " feature = Feature(\n",
+ " geometry=row[geometry_field],\n",
+ " id=row[hex_id_field],\n",
+ " properties={\"value\": row[value_field]},\n",
+ " )\n",
" list_features.append(feature)\n",
"\n",
" feat_collection = FeatureCollection(list_features)\n",
@@ -3491,31 +3458,31 @@
"\n",
" return geojson_result\n",
"\n",
+ "\n",
"# ---------------------------------------------------------------------------------\n",
"\n",
"\n",
- "def map_addlayer_filling(df_fill_hex, layer_name, map_initial, fillcolor = None):\n",
- " \"\"\" On a folium map (likely created with plot_basemap_region_fill),\n",
- " add a layer of hexagons that filled the geometry at given H3 resolution\n",
- " (df_fill_hex returned by fill_hexagons method)\"\"\"\n",
+ "def map_addlayer_filling(df_fill_hex, layer_name, map_initial, fillcolor=None):\n",
+ " \"\"\"On a folium map (likely created with plot_basemap_region_fill),\n",
+ " add a layer of hexagons that filled the geometry at given H3 resolution\n",
+ " (df_fill_hex returned by fill_hexagons method)\"\"\"\n",
"\n",
- " geojson_hx = hexagons_dataframe_to_geojson(df_fill_hex,\n",
- " hex_id_field = \"hex_id\",\n",
- " value_field = \"value\",\n",
- " geometry_field = \"geometry\")\n",
+ " geojson_hx = hexagons_dataframe_to_geojson(\n",
+ " df_fill_hex, hex_id_field=\"hex_id\", value_field=\"value\", geometry_field=\"geometry\"\n",
+ " )\n",
"\n",
" GeoJson(\n",
- " geojson_hx,\n",
- " style_function=lambda feature: {\n",
- " 'fillColor': fillcolor,\n",
- " 'color': 'red',\n",
- " 'weight': 2,\n",
- " 'fillOpacity': 0.1\n",
- " },\n",
- " name = layer_name\n",
- " ).add_to(map_initial)\n",
+ " geojson_hx,\n",
+ " style_function=lambda feature: {\n",
+ " \"fillColor\": fillcolor,\n",
+ " \"color\": \"red\",\n",
+ " \"weight\": 2,\n",
+ " \"fillOpacity\": 0.1,\n",
+ " },\n",
+ " name=layer_name,\n",
+ " ).add_to(map_initial)\n",
"\n",
- " return map_initial\n"
+ " return map_initial"
]
},
{
@@ -3524,37 +3491,36 @@
"metadata": {},
"outputs": [],
"source": [
- "def visualize_filled_compact(gdf,fillcolor=None):\n",
+ "def visualize_filled_compact(gdf, fillcolor=None):\n",
" overall_map = base_empty_map()\n",
- " map_ = plot_basemap_region_fill(gdf, initial_map =overall_map)\n",
- " \n",
+ " map_ = plot_basemap_region_fill(gdf, initial_map=overall_map)\n",
+ "\n",
" for i, row in gdf.iterrows():\n",
- " \n",
- " if len(row['compacted_novoids']) > 0:\n",
+ " if len(row[\"compacted_novoids\"]) > 0:\n",
" list_hexagons_filling_compact = row[\"compacted_novoids\"]\n",
" else:\n",
" list_hexagons_filling_compact = []\n",
- " \n",
+ "\n",
" list_hexagons_filling_compact.extend(row[\"hex_fill_compact\"])\n",
" list_hexagons_filling_compact = list(set(list_hexagons_filling_compact))\n",
- " \n",
+ "\n",
" # make dataframes\n",
" df_fill_compact = pd.DataFrame({\"hex_id\": list_hexagons_filling_compact})\n",
" df_fill_compact[\"value\"] = 0\n",
- " df_fill_compact['geometry'] = df_fill_compact.hex_id.apply(\n",
- " lambda x: \n",
- " {\"type\": \"Polygon\",\n",
- " \"coordinates\": [\n",
- " h3.h3_to_geo_boundary(h=x,\n",
- " geo_json=True)\n",
- " ]\n",
- " })\n",
+ " df_fill_compact[\"geometry\"] = df_fill_compact.hex_id.apply(\n",
+ " lambda x: {\n",
+ " \"type\": \"Polygon\",\n",
+ " \"coordinates\": [h3.h3_to_geo_boundary(h=x, geo_json=True)],\n",
+ " }\n",
+ " )\n",
"\n",
- " map_fill_compact = map_addlayer_filling(df_fill_hex = df_fill_compact, \n",
- " layer_name = 'test_ind',\n",
- " map_initial = map_,\n",
- " fillcolor = fillcolor)\n",
- " folium.map.LayerControl('bottomright', collapsed=True).add_to(map_fill_compact)\n",
+ " map_fill_compact = map_addlayer_filling(\n",
+ " df_fill_hex=df_fill_compact,\n",
+ " layer_name=\"test_ind\",\n",
+ " map_initial=map_,\n",
+ " fillcolor=fillcolor,\n",
+ " )\n",
+ " folium.map.LayerControl(\"bottomright\", collapsed=True).add_to(map_fill_compact)\n",
"\n",
" return map_fill_compact"
]
@@ -3592,7 +3558,7 @@
}
],
"source": [
- "visualize_filled_compact(gdf = test_gdf)"
+ "visualize_filled_compact(gdf=test_gdf)"
]
},
{
@@ -3613,29 +3579,24 @@
"outputs": [],
"source": [
"def generate_filled_compact(gdf):\n",
- " \n",
" for i, row in gdf.iterrows():\n",
- " \n",
- " if len(row['compacted_novoids']) > 0:\n",
+ " if len(row[\"compacted_novoids\"]) > 0:\n",
" list_hexagons_filling_compact = row[\"compacted_novoids\"]\n",
" else:\n",
" list_hexagons_filling_compact = []\n",
- " \n",
+ "\n",
" list_hexagons_filling_compact.extend(row[\"hex_fill_compact\"])\n",
" list_hexagons_filling_compact = list(set(list_hexagons_filling_compact))\n",
- " \n",
+ "\n",
" # make dataframes\n",
" df_fill_compact = pd.DataFrame({\"hex_id\": list_hexagons_filling_compact})\n",
" df_fill_compact[\"value\"] = 0\n",
- " df_fill_compact['geometry'] = df_fill_compact.hex_id.apply(\n",
- " lambda x: \n",
- " {\"type\": \"Polygon\",\n",
- " \"coordinates\": [\n",
- " h3.h3_to_geo_boundary(h=x,\n",
- " geo_json=True)\n",
- " ]\n",
- " })\n",
- "\n",
+ " df_fill_compact[\"geometry\"] = df_fill_compact.hex_id.apply(\n",
+ " lambda x: {\n",
+ " \"type\": \"Polygon\",\n",
+ " \"coordinates\": [h3.h3_to_geo_boundary(h=x, geo_json=True)],\n",
+ " }\n",
+ " )\n",
"\n",
" return df_fill_compact"
]
@@ -3731,7 +3692,7 @@
"metadata": {},
"outputs": [],
"source": [
- "to_save_gdf.to_json('test_hex_export.json')"
+ "to_save_gdf.to_json(\"test_hex_export.json\")"
]
},
{
@@ -3760,9 +3721,9 @@
"metadata": {},
"outputs": [],
"source": [
- "from shapely.geometry import mapping, shape\n",
- "import json \n",
+ "import json\n",
"\n",
+ "from shapely.geometry import shape\n",
"\n",
"to_save_gdf[\"the_geom\"] = to_save_gdf[\"geometry\"].apply(lambda x: shape(x))"
]
@@ -4028,7 +3989,7 @@
}
],
"source": [
- "to_save_gdf.set_geometry('the_geom')"
+ "to_save_gdf.set_geometry(\"the_geom\")"
]
},
{
@@ -4204,7 +4165,7 @@
}
],
"source": [
- "test_2 = gpd.GeoDataFrame(gdf_[['hex_id', 'the_geom']])\n",
+ "test_2 = gpd.GeoDataFrame(gdf_[[\"hex_id\", \"the_geom\"]])\n",
"test_2.head()"
]
},
@@ -4322,7 +4283,7 @@
}
],
"source": [
- "test_2.set_geometry('the_geom')\n"
+ "test_2.set_geometry(\"the_geom\")"
]
},
{
@@ -4331,7 +4292,7 @@
"metadata": {},
"outputs": [],
"source": [
- "test_2['geometry']=test_2['the_geom']"
+ "test_2[\"geometry\"] = test_2[\"the_geom\"]"
]
},
{
@@ -4363,7 +4324,7 @@
}
],
"source": [
- "test_2[['hex_id', 'geometry']].plot()"
+ "test_2[[\"hex_id\", \"geometry\"]].plot()"
]
},
{
@@ -4478,7 +4439,7 @@
}
],
"source": [
- "cv = gpd.read_file('../../datasets/raw/cvalenciana.shp')\n",
+ "cv = gpd.read_file(\"../../datasets/raw/cvalenciana.shp\")\n",
"cv.head()"
]
},
@@ -4569,7 +4530,7 @@
}
],
"source": [
- "cv_test = gpd.read_file('../../datasets/raw/cvalenciana_test.shp')\n",
+ "cv_test = gpd.read_file(\"../../datasets/raw/cvalenciana_test.shp\")\n",
"cv_test.head()"
]
},
@@ -4579,9 +4540,8 @@
"metadata": {},
"outputs": [],
"source": [
- "\n",
"# Create an empty dataframe to write data into\n",
- "h3_df = pd.DataFrame([],columns=['country','city','h3_id','h3_geo_boundary','h3_centroid'])"
+ "h3_df = pd.DataFrame([], columns=[\"country\", \"city\", \"h3_id\", \"h3_geo_boundary\", \"h3_centroid\"])"
]
},
{
@@ -4590,7 +4550,6 @@
"metadata": {},
"outputs": [],
"source": [
- "\n",
"import shapely"
]
},
@@ -4600,7 +4559,7 @@
"metadata": {},
"outputs": [],
"source": [
- "district_polygon = list(cv.iloc[0]['geometry']) "
+ "district_polygon = list(cv.iloc[0][\"geometry\"])"
]
},
{
@@ -4609,7 +4568,7 @@
"metadata": {},
"outputs": [],
"source": [
- "polygon_geojson = gpd.GeoSeries(cv.iloc[0]['geometry']).__geo_interface__"
+ "polygon_geojson = gpd.GeoSeries(cv.iloc[0][\"geometry\"]).__geo_interface__"
]
},
{
@@ -4627,7 +4586,7 @@
"metadata": {},
"outputs": [],
"source": [
- "poly_geojson = polygon_geojson['features'][0]['geometry']"
+ "poly_geojson = polygon_geojson[\"features\"][0][\"geometry\"]"
]
},
{
@@ -5666,19 +5625,13 @@
"outputs": [],
"source": [
"# Create an empty dataframe to write data into\n",
- "h3_df = pd.DataFrame([],columns=['h3_id','h3_geo_boundary','h3_centroid'])\n",
+ "h3_df = pd.DataFrame([], columns=[\"h3_id\", \"h3_geo_boundary\", \"h3_centroid\"])\n",
"for h3_hex in h3_hexes:\n",
- " h3_geo_boundary = shapely.geometry.Polygon(\n",
- " h3.h3_to_geo_boundary(h3_hex,geo_json=True)\n",
- " )\n",
- " \n",
+ " h3_geo_boundary = shapely.geometry.Polygon(h3.h3_to_geo_boundary(h3_hex, geo_json=True))\n",
+ "\n",
" h3_centroid = h3.h3_to_geo(h3_hex)\n",
" # Append results to dataframe\n",
- " h3_df.loc[len(h3_df)]=[\n",
- " h3_hex,\n",
- " h3_geo_boundary,\n",
- " h3_centroid\n",
- " ]"
+ " h3_df.loc[len(h3_df)] = [h3_hex, h3_geo_boundary, h3_centroid]"
]
},
{
@@ -5687,7 +5640,7 @@
"metadata": {},
"outputs": [],
"source": [
- "h3_df.to_csv('../../datasets/processed/hex3_test_v4_8res.csv', index=False)"
+ "h3_df.to_csv(\"../../datasets/processed/hex3_test_v4_8res.csv\", index=False)"
]
},
{
@@ -5706,19 +5659,13 @@
"outputs": [],
"source": [
"# Create an empty dataframe to write data into\n",
- "h3_df_test = pd.DataFrame([],columns=['h3_id','h3_geo_boundary','h3_centroid'])\n",
+ "h3_df_test = pd.DataFrame([], columns=[\"h3_id\", \"h3_geo_boundary\", \"h3_centroid\"])\n",
"for h3_hex in test_h3:\n",
- " h3_geo_boundary = shapely.geometry.Polygon(\n",
- " h3.h3_to_geo_boundary(h3_hex,geo_json=True)\n",
- " )\n",
- " \n",
+ " h3_geo_boundary = shapely.geometry.Polygon(h3.h3_to_geo_boundary(h3_hex, geo_json=True))\n",
+ "\n",
" h3_centroid = h3.h3_to_geo(h3_hex)\n",
" # Append results to dataframe\n",
- " h3_df_test.loc[len(h3_df)]=[\n",
- " h3_hex,\n",
- " h3_geo_boundary,\n",
- " h3_centroid\n",
- " ]"
+ " h3_df_test.loc[len(h3_df)] = [h3_hex, h3_geo_boundary, h3_centroid]"
]
},
{
@@ -5727,7 +5674,7 @@
"metadata": {},
"outputs": [],
"source": [
- "h3_df_test.to_csv('../../datasets/processed/hex3_test_v2.csv', index=False)"
+ "h3_df_test.to_csv(\"../../datasets/processed/hex3_test_v2.csv\", index=False)"
]
},
{
@@ -5736,34 +5683,26 @@
"metadata": {},
"outputs": [],
"source": [
- "# Iterate over every row of the geo dataframe \n",
+ "# Iterate over every row of the geo dataframe\n",
"for i, row in cv.iterrows():\n",
" # Parse out info from columns of row\n",
" country = row.NAME_0\n",
" city = row.NAME_1\n",
" district_multipolygon = row.geometry\n",
" # Convert multi-polygon into list of polygons\n",
- " district_polygon = list(district_multipolygon) \n",
+ " district_polygon = list(district_multipolygon)\n",
" for polygon in district_polygon:\n",
" # Convert Polygon to GeoJSON dictionary\n",
" poly_geojson = gpd.GeoSeries([polygon]).__geo_interface__\n",
" # Parse out geometry key from GeoJSON dictionary\n",
- " poly_geojson = poly_geojson['features'][0]['geometry'] \n",
+ " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n",
" # Fill the dictionary with Resolution 10 H3 Hexagons\n",
- " h3_hexes = h3.polyfill_geojson(poly_geojson, 10) \n",
+ " h3_hexes = h3.polyfill_geojson(poly_geojson, 10)\n",
" for h3_hex in h3_hexes:\n",
- " h3_geo_boundary = shapely.geometry.Polygon(\n",
- " h3.h3_to_geo_boundary(h3_hex,geo_json=True)\n",
- " )\n",
+ " h3_geo_boundary = shapely.geometry.Polygon(h3.h3_to_geo_boundary(h3_hex, geo_json=True))\n",
" h3_centroid = h3.h3_to_geo(h3_hex)\n",
" # Append results to dataframe\n",
- " h3_df.loc[len(h3_df)]=[\n",
- " country,\n",
- " city,\n",
- " h3_hex,\n",
- " h3_geo_boundary,\n",
- " h3_centroid\n",
- " ]"
+ " h3_df.loc[len(h3_df)] = [country, city, h3_hex, h3_geo_boundary, h3_centroid]"
]
},
{
diff --git a/data/notebooks/Lab/0_4_1_H3_calculations.ipynb b/data/notebooks/Lab/0_4_1_H3_calculations.ipynb
index 97f80e973..9c7c29e6c 100644
--- a/data/notebooks/Lab/0_4_1_H3_calculations.ipynb
+++ b/data/notebooks/Lab/0_4_1_H3_calculations.ipynb
@@ -375,31 +375,27 @@
}
],
"source": [
- "# import libraries\n",
- "import h3\n",
- "\n",
- "import pandas as pd\n",
- "import geopandas as gpd\n",
"import json\n",
"import time\n",
"\n",
- "from shapely.geometry import shape, Polygon, Point\n",
- "\n",
+ "import geopandas as gpd\n",
"\n",
+ "# import libraries\n",
+ "import h3\n",
+ "import pandas as pd\n",
+ "import pandas_bokeh\n",
"from rasterstats import gen_zonal_stats\n",
+ "from shapely.geometry import Point, shape\n",
"\n",
- "\n",
- "import pandas_bokeh\n",
"pandas_bokeh.output_notebook()\n",
"\n",
+ "\n",
"import numpy as np\n",
"import scipy.special\n",
- "\n",
"from bokeh.layouts import gridplot\n",
- "from bokeh.plotting import figure, output_file, show\n",
"from bokeh.models import ColumnDataSource\n",
- "from datetime import datetime\n",
- "from bokeh.palettes import Spectral10"
+ "from bokeh.palettes import Spectral10\n",
+ "from bokeh.plotting import figure, output_file, show"
]
},
{
@@ -429,25 +425,27 @@
"outputs": [],
"source": [
"# define function to covert geoms to h3\n",
+ "\n",
+ "\n",
"def generate_h3_features(geometry, res):\n",
" \"\"\"\n",
" Generate h3 for geometry\n",
- " \n",
+ "\n",
" Input\n",
" ------\n",
" geometry: shapely.polygon or shapely.multipolygon\n",
- " \n",
+ "\n",
" Output\n",
" ------\n",
" gdf with H3_hexes\n",
" \"\"\"\n",
" # Create an empty dataframe to write data into\n",
- " h3_df = pd.DataFrame([],columns=['h3_id'])\n",
- " if geometry.geom_type == 'MultiPolygon':\n",
+ " pd.DataFrame([], columns=[\"h3_id\"])\n",
+ " if geometry.geom_type == \"MultiPolygon\":\n",
" district_polygon = list(geometry)\n",
" for polygon in district_polygon:\n",
" poly_geojson = gpd.GeoSeries([polygon]).__geo_interface__\n",
- " poly_geojson = poly_geojson['features'][0]['geometry'] \n",
+ " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n",
" h3_hexes = h3.polyfill_geojson(poly_geojson, res)\n",
" for h3_hex in h3_hexes:\n",
" coords = h3.h3_set_to_multi_polygon([h3_hex], geo_json=True)\n",
@@ -456,9 +454,9 @@
" \"properties\": {\"hexid\": h3_hex},\n",
" \"geometry\": {\"type\": \"Polygon\", \"coordinates\": coords[0]},\n",
" }\n",
- " elif geometry.geom_type == 'Polygon':\n",
+ " elif geometry.geom_type == \"Polygon\":\n",
" poly_geojson = gpd.GeoSeries(geometry).__geo_interface__\n",
- " poly_geojson = poly_geojson['features'][0]['geometry']\n",
+ " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n",
" h3_hexes = h3.polyfill_geojson(poly_geojson, res)\n",
" for h3_hex in h3_hexes:\n",
" coords = h3.h3_set_to_multi_polygon([h3_hex], geo_json=True)\n",
@@ -468,14 +466,13 @@
" \"geometry\": {\"type\": \"Polygon\", \"coordinates\": coords[0]},\n",
" }\n",
" else:\n",
- " print('Shape is not a polygon or multypolygon.')\n",
- " \n",
+ " print(\"Shape is not a polygon or multypolygon.\")\n",
+ "\n",
"\n",
- " \n",
"def get_h3_array(geom, raster_path, res, stats, prefix):\n",
" \"\"\"\n",
" Function that trasnlate a raster into h3\n",
- " \n",
+ "\n",
" Input\n",
" ------\n",
" geom - geometry used for filling with h3\n",
@@ -483,13 +480,13 @@
" res - resolution of the h3 level\n",
" stats - stats used in the summary stats\n",
" prefix - for output in the summary stats column\n",
- " \n",
+ "\n",
" Output\n",
" ------\n",
" array - temporal array with hex id and stats info\n",
" \"\"\"\n",
" h3_features = generate_h3_features(geom, res)\n",
- " \n",
+ "\n",
" summ_stats_h3_r5 = gen_zonal_stats(\n",
" h3_features,\n",
" raster_path,\n",
@@ -497,33 +494,40 @@
" prefix=prefix,\n",
" percent_cover_weighting=True,\n",
" geojson_out=True,\n",
- " all_touched=True\n",
- " )\n",
- " \n",
+ " all_touched=True,\n",
+ " )\n",
+ "\n",
" _array = []\n",
" for feature in summ_stats_h3_r5:\n",
- " if feature['properties'][f'{prefix}{stats}'] !=0:\n",
+ " if feature[\"properties\"][f\"{prefix}{stats}\"] != 0:\n",
" element = {\n",
- " 'sumStats':feature['properties'][f'{prefix}{stats}'],\n",
- " 'hexId':feature['properties']['hexid'], \n",
+ " \"sumStats\": feature[\"properties\"][f\"{prefix}{stats}\"],\n",
+ " \"hexId\": feature[\"properties\"][\"hexid\"],\n",
" }\n",
" _array.append(element)\n",
- " return _array \n",
+ " return _array\n",
"\n",
"\n",
"def make_plot(title, hist, edges, x, pdf, cdf):\n",
- " p = figure(title=title, tools='', background_fill_color=\"#fafafa\")\n",
- " p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],\n",
- " fill_color=\"navy\", line_color=\"white\", alpha=0.5)\n",
+ " p = figure(title=title, tools=\"\", background_fill_color=\"#fafafa\")\n",
+ " p.quad(\n",
+ " top=hist,\n",
+ " bottom=0,\n",
+ " left=edges[:-1],\n",
+ " right=edges[1:],\n",
+ " fill_color=\"navy\",\n",
+ " line_color=\"white\",\n",
+ " alpha=0.5,\n",
+ " )\n",
" p.line(x, pdf, line_color=\"#ff8888\", line_width=4, alpha=0.7, legend_label=\"PDF\")\n",
" p.line(x, cdf, line_color=\"orange\", line_width=2, alpha=0.7, legend_label=\"CDF\")\n",
"\n",
" p.y_range.start = 0\n",
" p.legend.location = \"center_right\"\n",
" p.legend.background_fill_color = \"#fefefe\"\n",
- " p.xaxis.axis_label = 'x'\n",
- " p.yaxis.axis_label = 'Pr(x)'\n",
- " p.grid.grid_line_color=\"white\"\n",
+ " p.xaxis.axis_label = \"x\"\n",
+ " p.yaxis.axis_label = \"Pr(x)\"\n",
+ " p.grid.grid_line_color = \"white\"\n",
" return p"
]
},
@@ -609,8 +613,8 @@
}
],
"source": [
- "#import indonesia clip test shape\n",
- "gdf_ind = gpd.read_file('../../datasets/raw/input_data_test/indonesia_test_shape_clip.shp')\n",
+ "# import indonesia clip test shape\n",
+ "gdf_ind = gpd.read_file(\"../../datasets/raw/input_data_test/indonesia_test_shape_clip.shp\")\n",
"gdf_ind"
]
},
@@ -642,8 +646,8 @@
}
],
"source": [
- "#set geom to epsg 4326 for summ stats\n",
- "gdf_ind = gdf_ind.to_crs('EPSG:4326')\n",
+ "# set geom to epsg 4326 for summ stats\n",
+ "gdf_ind = gdf_ind.to_crs(\"EPSG:4326\")\n",
"gdf_ind.crs"
]
},
@@ -668,8 +672,8 @@
}
],
"source": [
- "#get geometry to parse\n",
- "geom = gdf_ind.iloc[0]['geometry']\n",
+ "# get geometry to parse\n",
+ "geom = gdf_ind.iloc[0][\"geometry\"]\n",
"geom"
]
},
@@ -725,8 +729,8 @@
}
],
"source": [
- "#import world dataset\n",
- "gdf_world = gpd.read_file('../../datasets/raw/input_data_test/world_shape_simpl.shp')\n",
+ "# import world dataset\n",
+ "gdf_world = gpd.read_file(\"../../datasets/raw/input_data_test/world_shape_simpl.shp\")\n",
"gdf_world"
]
},
@@ -758,7 +762,7 @@
}
],
"source": [
- "#check crs of world geom\n",
+ "# check crs of world geom\n",
"gdf_world.crs"
]
},
@@ -783,7 +787,7 @@
}
],
"source": [
- "geom_world = gdf_world.iloc[0]['geometry']\n",
+ "geom_world = gdf_world.iloc[0][\"geometry\"]\n",
"geom_world"
]
},
@@ -803,12 +807,16 @@
"metadata": {},
"outputs": [],
"source": [
- "#rasters for testing calculations with different resolutions - need to be in epsg4326\n",
- "raster_path_30m = '../../datasets/processed/deforestation_indicators/deforestation_risk_ha_2018_new_extent_4326.tif'\n",
- "raster_path_10km = '../../datasets/processed/processed_data/risk_map/water_risk_cotton_4326_2000_v2.tif'\n",
+ "# rasters for testing calculations with different resolutions - need to be in epsg4326\n",
+ "raster_path_30m = \"../../datasets/processed/deforestation_indicators/deforestation_risk_ha_2018_new_extent_4326.tif\"\n",
+ "raster_path_10km = (\n",
+ " \"../../datasets/processed/processed_data/risk_map/water_risk_cotton_4326_2000_v2.tif\"\n",
+ ")\n",
"\n",
"\n",
- "raster_path_10km_3857 = '../../datasets/processed/processed_data/risk_map/water_risk_cotton_3857_2000_v2.tif'"
+ "raster_path_10km_3857 = (\n",
+ " \"../../datasets/processed/processed_data/risk_map/water_risk_cotton_3857_2000_v2.tif\"\n",
+ ")"
]
},
{
@@ -826,8 +834,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#get h3 array for resolution 5 and raster of 10km resolution\n",
- "array_5_res = get_h3_array(geom, raster_path_10km, 5, 'median', 'wr_cotton_')"
+ "# get h3 array for resolution 5 and raster of 10km resolution\n",
+ "array_5_res = get_h3_array(geom, raster_path_10km, 5, \"median\", \"wr_cotton_\")"
]
},
{
@@ -837,8 +845,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#export json\n",
- "with open('./water_risk_res5.json', 'w') as f:\n",
+ "# export json\n",
+ "with open(\"./water_risk_res5.json\", \"w\") as f:\n",
" json.dump(array_5_res, f)"
]
},
@@ -857,8 +865,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#get h3 array for resolution 5 and raster of 10km resolution\n",
- "array_6_res = get_h3_array(geom, raster_path_10km, 6, 'median', 'wr_cotton_')"
+ "# get h3 array for resolution 5 and raster of 10km resolution\n",
+ "array_6_res = get_h3_array(geom, raster_path_10km, 6, \"median\", \"wr_cotton_\")"
]
},
{
@@ -868,8 +876,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#export json\n",
- "with open('./water_risk_res6.json', 'w') as f:\n",
+ "# export json\n",
+ "with open(\"./water_risk_res6.json\", \"w\") as f:\n",
" json.dump(array_6_res, f)"
]
},
@@ -888,8 +896,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#get h3 array for resolution 5 and raster of 10km resolution\n",
- "array_6_res_mean = get_h3_array(geom, raster_path_10km, 6, 'mean', 'wr_cotton_')"
+ "# get h3 array for resolution 5 and raster of 10km resolution\n",
+ "array_6_res_mean = get_h3_array(geom, raster_path_10km, 6, \"mean\", \"wr_cotton_\")"
]
},
{
@@ -899,8 +907,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#export json\n",
- "with open('./water_risk_res6_mean.json', 'w') as f:\n",
+ "# export json\n",
+ "with open(\"./water_risk_res6_mean.json\", \"w\") as f:\n",
" json.dump(array_6_res_mean, f)"
]
},
@@ -919,8 +927,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#get h3 array for resolution 5 and raster of 10km resolution\n",
- "array_6_res_sum = get_h3_array(geom, raster_path_10km, 6, 'sum', 'wr_cotton_')"
+ "# get h3 array for resolution 5 and raster of 10km resolution\n",
+ "array_6_res_sum = get_h3_array(geom, raster_path_10km, 6, \"sum\", \"wr_cotton_\")"
]
},
{
@@ -930,8 +938,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#export json\n",
- "with open('./water_risk_res6_sum.json', 'w') as f:\n",
+ "# export json\n",
+ "with open(\"./water_risk_res6_sum.json\", \"w\") as f:\n",
" json.dump(array_6_res_sum, f)"
]
},
@@ -950,8 +958,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#get h3 array for resolution 5 and raster of 10km resolution\n",
- "array_6_res = get_h3_array(geom, raster_path_30m, 6, 'sum', 'wr_cotton_')"
+ "# get h3 array for resolution 5 and raster of 10km resolution\n",
+ "array_6_res = get_h3_array(geom, raster_path_30m, 6, \"sum\", \"wr_cotton_\")"
]
},
{
@@ -961,8 +969,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#export json\n",
- "with open('./df_cotton_res6.json', 'w') as f:\n",
+ "# export json\n",
+ "with open(\"./df_cotton_res6.json\", \"w\") as f:\n",
" json.dump(array_6_res, f)"
]
},
@@ -981,8 +989,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#get h3 array for resolution 5 and raster of 10km resolution\n",
- "array_8_res = get_h3_array(geom, raster_path_30m, 8, 'sum', 'wr_cotton_')"
+ "# get h3 array for resolution 5 and raster of 10km resolution\n",
+ "array_8_res = get_h3_array(geom, raster_path_30m, 8, \"sum\", \"wr_cotton_\")"
]
},
{
@@ -992,8 +1000,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#export json\n",
- "with open('./df_cotton_res8.json', 'w') as f:\n",
+ "# export json\n",
+ "with open(\"./df_cotton_res8.json\", \"w\") as f:\n",
" json.dump(array_8_res, f)"
]
},
@@ -1012,8 +1020,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#get h3 array for resolution 5 and raster of 10km resolution\n",
- "array_9_res = get_h3_array(geom, raster_path_30m, 9, 'sum', 'wr_cotton_')"
+ "# get h3 array for resolution 5 and raster of 10km resolution\n",
+ "array_9_res = get_h3_array(geom, raster_path_30m, 9, \"sum\", \"wr_cotton_\")"
]
},
{
@@ -1023,8 +1031,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#export json\n",
- "with open('./df_cotton_res9.json', 'w') as f:\n",
+ "# export json\n",
+ "with open(\"./df_cotton_res9.json\", \"w\") as f:\n",
" json.dump(array_9_res, f)"
]
},
@@ -1043,9 +1051,9 @@
"metadata": {},
"outputs": [],
"source": [
- "#explore the res 9 but with weighted mean - difference with weighted sum\n",
- "#get h3 array for resolution 5 and raster of 10km resolution\n",
- "array_9_res = get_h3_array(geom, raster_path_30m, 9, 'mean', 'wr_cotton_')"
+ "# explore the res 9 but with weighted mean - difference with weighted sum\n",
+ "# get h3 array for resolution 5 and raster of 10km resolution\n",
+ "array_9_res = get_h3_array(geom, raster_path_30m, 9, \"mean\", \"wr_cotton_\")"
]
},
{
@@ -1055,8 +1063,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#export json\n",
- "with open('./df_cotton_res9_mean.json', 'w') as f:\n",
+ "# export json\n",
+ "with open(\"./df_cotton_res9_mean.json\", \"w\") as f:\n",
" json.dump(array_9_res, f)"
]
},
@@ -1075,8 +1083,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#get h3 array for resolution 5 and raster of 10km resolution\n",
- "array_6_res_world = get_h3_array(geom_world, raster_path_10km, 6, 'mean', 'wr_cotton_')"
+ "# get h3 array for resolution 5 and raster of 10km resolution\n",
+ "array_6_res_world = get_h3_array(geom_world, raster_path_10km, 6, \"mean\", \"wr_cotton_\")"
]
},
{
@@ -1086,8 +1094,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#export json\n",
- "with open('./water_risk_cotton_res6_mean_global.json', 'w') as f:\n",
+ "# export json\n",
+ "with open(\"./water_risk_cotton_res6_mean_global.json\", \"w\") as f:\n",
" json.dump(array_6_res_world_3857, f)"
]
},
@@ -1098,7 +1106,7 @@
"metadata": {},
"outputs": [],
"source": [
- "array_6_res_world_clean = [el for el in array_6_res_world if el['sumStats'] != None]\n"
+ "array_6_res_world_clean = [el for el in array_6_res_world if el[\"sumStats\"] is not None]"
]
},
{
@@ -1108,7 +1116,7 @@
"metadata": {},
"outputs": [],
"source": [
- "with open('./water_risk_cotton_res6_mean_global_clean.json', 'w') as f:\n",
+ "with open(\"./water_risk_cotton_res6_mean_global_clean.json\", \"w\") as f:\n",
" json.dump(array_6_res_world_clean, f)"
]
},
@@ -1127,8 +1135,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#get h3 array for resolution 5 and raster of 10km resolution\n",
- "array_1_res_world = get_h3_array(geom_world, raster_path_10km, 1, 'sum', 'wr_cotton_')"
+ "# get h3 array for resolution 5 and raster of 10km resolution\n",
+ "array_1_res_world = get_h3_array(geom_world, raster_path_10km, 1, \"sum\", \"wr_cotton_\")"
]
},
{
@@ -1138,8 +1146,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#export json\n",
- "with open('./water_risk_cotton_res1_mean_global.json', 'w') as f:\n",
+ "# export json\n",
+ "with open(\"./water_risk_cotton_res1_mean_global.json\", \"w\") as f:\n",
" json.dump(array_1_res_world, f)"
]
},
@@ -1158,8 +1166,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#get h3 array for resolution 5 and raster of 10km resolution\n",
- "array_3_res_world = get_h3_array(geom_world, raster_path_10km, 3, 'sum', 'wr_cotton_')"
+ "# get h3 array for resolution 5 and raster of 10km resolution\n",
+ "array_3_res_world = get_h3_array(geom_world, raster_path_10km, 3, \"sum\", \"wr_cotton_\")"
]
},
{
@@ -1169,8 +1177,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#export json\n",
- "with open('./water_risk_cotton_res3_mean_global.json', 'w') as f:\n",
+ "# export json\n",
+ "with open(\"./water_risk_cotton_res3_mean_global.json\", \"w\") as f:\n",
" json.dump(array_3_res_world, f)"
]
},
@@ -1189,8 +1197,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#get h3 array for resolution 5 and raster of 10km resolution\n",
- "array_5_res_world = get_h3_array(geom_world, raster_path_10km, 5, 'sum', 'wr_cotton_')"
+ "# get h3 array for resolution 5 and raster of 10km resolution\n",
+ "array_5_res_world = get_h3_array(geom_world, raster_path_10km, 5, \"sum\", \"wr_cotton_\")"
]
},
{
@@ -1200,8 +1208,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#export json\n",
- "with open('./water_risk_cotton_res5_sum_global.json', 'w') as f:\n",
+ "# export json\n",
+ "with open(\"./water_risk_cotton_res5_sum_global.json\", \"w\") as f:\n",
" json.dump(array_5_res_world, f)"
]
},
@@ -1212,9 +1220,9 @@
"metadata": {},
"outputs": [],
"source": [
- "#clean none from res 5\n",
+ "# clean none from res 5\n",
"\n",
- "with open('./water_risk_cotton_res5_mean_global.json', 'r') as f:\n",
+ "with open(\"./water_risk_cotton_res5_mean_global.json\", \"r\") as f:\n",
" array_5_res_world = json.load(f)"
]
},
@@ -1225,7 +1233,7 @@
"metadata": {},
"outputs": [],
"source": [
- "array_5_res_world = [el for el in array_5_res_world if el['sumStats'] != None]"
+ "array_5_res_world = [el for el in array_5_res_world if el[\"sumStats\"] is not None]"
]
},
{
@@ -1243,8 +1251,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#get h3 array for resolution 5 and raster of 10km resolution\n",
- "array_6_res_world = get_h3_array(geom_world, raster_path_10km, 6, 'sum', 'wr_cotton_')"
+ "# get h3 array for resolution 5 and raster of 10km resolution\n",
+ "array_6_res_world = get_h3_array(geom_world, raster_path_10km, 6, \"sum\", \"wr_cotton_\")"
]
},
{
@@ -1254,8 +1262,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#export json\n",
- "with open('./water_risk_cotton_res6_sum_global.json', 'w') as f:\n",
+ "# export json\n",
+ "with open(\"./water_risk_cotton_res6_sum_global.json\", \"w\") as f:\n",
" json.dump(array_6_res_world, f)"
]
},
@@ -1266,7 +1274,7 @@
"metadata": {},
"outputs": [],
"source": [
- "with open('./water_risk_cotton_res6_sum_global.json', 'r') as f:\n",
+ "with open(\"./water_risk_cotton_res6_sum_global.json\", \"r\") as f:\n",
" array_6_res_world = json.load(f)"
]
},
@@ -1277,7 +1285,7 @@
"metadata": {},
"outputs": [],
"source": [
- "array_6_res_world = [el for el in array_6_res_world if el['sumStats'] != None]\n"
+ "array_6_res_world = [el for el in array_6_res_world if el[\"sumStats\"] is not None]"
]
},
{
@@ -1307,10 +1315,7 @@
"source": [
"_array = []\n",
"for feature in generator:\n",
- " element = {\n",
- " 'hexId':feature['properties']['hexid'], \n",
- " 'geometry':feature['geometry']\n",
- " }\n",
+ " element = {\"hexId\": feature[\"properties\"][\"hexid\"], \"geometry\": feature[\"geometry\"]}\n",
" _array.append(element)"
]
},
@@ -1401,12 +1406,11 @@
"metadata": {},
"outputs": [],
"source": [
- "\n",
"geometries = []\n",
- "for i,row in gdf.iterrows():\n",
- " geom = shape(row['geometry'])\n",
+ "for i, row in gdf.iterrows():\n",
+ " geom = shape(row[\"geometry\"])\n",
" geometries.append(geom)\n",
- "gdf['geometry']=geometries"
+ "gdf[\"geometry\"] = geometries"
]
},
{
@@ -1416,7 +1420,7 @@
"metadata": {},
"outputs": [],
"source": [
- "gdf = gdf.set_geometry('geometry')\n",
+ "gdf = gdf.set_geometry(\"geometry\")\n",
"gdf.crs"
]
},
@@ -1496,8 +1500,8 @@
}
],
"source": [
- "gdf = gdf.set_crs('EPSG:4326')\n",
- "gdf = gdf.to_crs('EPSG:3857')\n",
+ "gdf = gdf.set_crs(\"EPSG:4326\")\n",
+ "gdf = gdf.to_crs(\"EPSG:3857\")\n",
"gdf.head()"
]
},
@@ -1508,8 +1512,7 @@
"metadata": {},
"outputs": [],
"source": [
- "gdf.to_file('./world_geom_3857.json',\n",
- " driver='GeoJSON')"
+ "gdf.to_file(\"./world_geom_3857.json\", driver=\"GeoJSON\")"
]
},
{
@@ -1527,7 +1530,7 @@
"metadata": {},
"outputs": [],
"source": [
- "with open('./water_risk_cotton_res6_mean_global_clean.json', 'r') as f:\n",
+ "with open(\"./water_risk_cotton_res6_mean_global_clean.json\", \"r\") as f:\n",
" array_6_res_world_clean = json.load(f)"
]
},
@@ -1619,8 +1622,8 @@
"outputs": [],
"source": [
"geometries = []\n",
- "for i,row in gdf.iterrows():\n",
- " hexid = row['hexId']\n",
+ "for i, row in gdf.iterrows():\n",
+ " hexid = row[\"hexId\"]\n",
" coords = h3.h3_set_to_multi_polygon([hexid], geo_json=True)\n",
" geom_feature = {\"type\": \"Polygon\", \"coordinates\": coords[0]}\n",
" geom = shape(geom_feature)\n",
@@ -1716,8 +1719,8 @@
}
],
"source": [
- "#append geometry in epsg4326\n",
- "gdf['geometry']= geometries\n",
+ "# append geometry in epsg4326\n",
+ "gdf[\"geometry\"] = geometries\n",
"gdf.head()"
]
},
@@ -1728,7 +1731,7 @@
"metadata": {},
"outputs": [],
"source": [
- "gdf = gdf.set_geometry('geometry')\n",
+ "gdf = gdf.set_geometry(\"geometry\")\n",
"gdf.crs"
]
},
@@ -1763,9 +1766,9 @@
}
],
"source": [
- "#set crs to epsg4326 and reproject to epsg3857\n",
- "gdf = gdf.set_crs('EPSG:4326')\n",
- "gdf = gdf.to_crs('EPSG:3857')\n",
+ "# set crs to epsg4326 and reproject to epsg3857\n",
+ "gdf = gdf.set_crs(\"EPSG:4326\")\n",
+ "gdf = gdf.to_crs(\"EPSG:3857\")\n",
"gdf.crs"
]
},
@@ -1776,9 +1779,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#save as json\n",
- "gdf.to_file('./water_risk_cotton_res6_mean_global_clean_3857.json',\n",
- " driver='GeoJSON')"
+ "# save as json\n",
+ "gdf.to_file(\"./water_risk_cotton_res6_mean_global_clean_3857.json\", driver=\"GeoJSON\")"
]
},
{
@@ -1796,7 +1798,7 @@
"metadata": {},
"outputs": [],
"source": [
- "with open('./water_risk_cotton_res6_mean_global_clean.json', 'r') as f:\n",
+ "with open(\"./water_risk_cotton_res6_mean_global_clean.json\", \"r\") as f:\n",
" array_6_res_world_clean = json.load(f)"
]
},
@@ -1888,10 +1890,10 @@
"outputs": [],
"source": [
"geometries = []\n",
- "for i,row in gdf.iterrows():\n",
- " hexid = row['hexId']\n",
+ "for i, row in gdf.iterrows():\n",
+ " hexid = row[\"hexId\"]\n",
" centroid = h3.h3_to_geo(hexid)\n",
- " point = Point(centroid[1],centroid[0])\n",
+ " point = Point(centroid[1], centroid[0])\n",
" geometries.append(point)"
]
},
@@ -1977,7 +1979,7 @@
}
],
"source": [
- "gdf['geometry'] = geometries\n",
+ "gdf[\"geometry\"] = geometries\n",
"gdf.head()"
]
},
@@ -1988,8 +1990,8 @@
"metadata": {},
"outputs": [],
"source": [
- "gdf = gdf.set_geometry('geometry')\n",
- "gdf = gdf.set_crs('EPSG:4326')"
+ "gdf = gdf.set_geometry(\"geometry\")\n",
+ "gdf = gdf.set_crs(\"EPSG:4326\")"
]
},
{
@@ -1999,8 +2001,7 @@
"metadata": {},
"outputs": [],
"source": [
- "gdf.to_file('./water_risk_cotton_res6_mean_global_clean_point.json',\n",
- " driver='GeoJSON')"
+ "gdf.to_file(\"./water_risk_cotton_res6_mean_global_clean_point.json\", driver=\"GeoJSON\")"
]
},
{
@@ -2152,7 +2153,7 @@
],
"source": [
"## import user data\n",
- "user_data = gpd.read_file('../../datasets/processed/user_data/located_lg_data_polygon_v2.shp')\n",
+ "user_data = gpd.read_file(\"../../datasets/processed/user_data/located_lg_data_polygon_v2.shp\")\n",
"user_data.head()"
]
},
@@ -2184,7 +2185,7 @@
}
],
"source": [
- "user_data[user_data['Material']=='Cotton'].iloc[0]"
+ "user_data[user_data[\"Material\"] == \"Cotton\"].iloc[0]"
]
},
{
@@ -2194,9 +2195,9 @@
"metadata": {},
"outputs": [],
"source": [
- "#check with one location\n",
- "geom = user_data[user_data['Material']=='Cotton'].iloc[0]['geometry']\n",
- "generator = generate_h3_features(geom, 6)\n"
+ "# check with one location\n",
+ "geom = user_data[user_data[\"Material\"] == \"Cotton\"].iloc[0][\"geometry\"]\n",
+ "generator = generate_h3_features(geom, 6)"
]
},
{
@@ -2206,9 +2207,10 @@
"metadata": {},
"outputs": [],
"source": [
- "test_china = [{'volume':2400,\n",
- " 'hexid':feature['properties']['hexid'],\n",
- " 'geometry':feature['geometry']} for feature in generator]"
+ "test_china = [\n",
+ " {\"volume\": 2400, \"hexid\": feature[\"properties\"][\"hexid\"], \"geometry\": feature[\"geometry\"]}\n",
+ " for feature in generator\n",
+ "]"
]
},
{
@@ -2238,15 +2240,15 @@
"outputs": [],
"source": [
"harvest_area_fraction_raster = {\n",
- " 'Rubber': '../../datasets/raw/crop_data/rubber/rubber_HarvestedAreaFraction.tif',\n",
- " 'Cotton': '../../datasets/raw/crop_data/cotton/cotton_HarvestedAreaFraction.tif',\n",
- " 'Leather': '../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/asture2000_5m_ext_v2.tif'\n",
+ " \"Rubber\": \"../../datasets/raw/crop_data/rubber/rubber_HarvestedAreaFraction.tif\",\n",
+ " \"Cotton\": \"../../datasets/raw/crop_data/cotton/cotton_HarvestedAreaFraction.tif\",\n",
+ " \"Leather\": \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/asture2000_5m_ext_v2.tif\",\n",
"}\n",
"\n",
"yield_raster = {\n",
- " 'Rubber': '../../datasets/raw/crop_data/rubber/rubber_YieldPerHectare.tif',\n",
- " 'Cotton': '../../datasets/raw/crop_data/cotton/cotton_YieldPerHectare.tif',\n",
- " 'Leather': '../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield_v5_NoZeros.tif'\n",
+ " \"Rubber\": \"../../datasets/raw/crop_data/rubber/rubber_YieldPerHectare.tif\",\n",
+ " \"Cotton\": \"../../datasets/raw/crop_data/cotton/cotton_YieldPerHectare.tif\",\n",
+ " \"Leather\": \"../../datasets/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m_yield_v5_NoZeros.tif\",\n",
"}"
]
},
@@ -2273,11 +2275,11 @@
}
],
"source": [
- "#zonal stats for harvest area\n",
+ "# zonal stats for harvest area\n",
"start_time = time.time()\n",
- "material = user_data.iloc[0]['Material']\n",
+ "material = user_data.iloc[0][\"Material\"]\n",
"raster_path_ha = harvest_area_fraction_raster[material]\n",
- "_array_ha = get_h3_array(geom, raster_path_ha, 6, 'mean', 'ha')\n",
+ "_array_ha = get_h3_array(geom, raster_path_ha, 6, \"mean\", \"ha\")\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -2288,11 +2290,11 @@
"metadata": {},
"outputs": [],
"source": [
- "#parse harvest area fraction to h3 index\n",
+ "# parse harvest area fraction to h3 index\n",
"for el in test_china:\n",
- " harvest_area_list = [ha['sumStats'] for ha in _array_ha if ha['hexId'] == el['hexid']]\n",
- " harvest_area = harvest_area_list[0] if len(harvest_area_list)>0 else 0\n",
- " el['ha']=harvest_area"
+ " harvest_area_list = [ha[\"sumStats\"] for ha in _array_ha if ha[\"hexId\"] == el[\"hexid\"]]\n",
+ " harvest_area = harvest_area_list[0] if len(harvest_area_list) > 0 else 0\n",
+ " el[\"ha\"] = harvest_area"
]
},
{
@@ -2303,8 +2305,10 @@
"outputs": [],
"source": [
"# export unique user data\n",
- "with open('../../datasets/processed/user_data/located_lg_data_polygon_v2_h3_unique_china_ha.json', 'w') as f:\n",
- " json.dump(test_china,f)"
+ "with open(\n",
+ " \"../../datasets/processed/user_data/located_lg_data_polygon_v2_h3_unique_china_ha.json\", \"w\"\n",
+ ") as f:\n",
+ " json.dump(test_china, f)"
]
},
{
@@ -2314,7 +2318,9 @@
"metadata": {},
"outputs": [],
"source": [
- "with open('../../datasets/processed/user_data/located_lg_data_polygon_v2_h3_unique_china_ha.json','r') as f:\n",
+ "with open(\n",
+ " \"../../datasets/processed/user_data/located_lg_data_polygon_v2_h3_unique_china_ha.json\", \"r\"\n",
+ ") as f:\n",
" test_china = json.load(f)"
]
},
@@ -2325,15 +2331,15 @@
"metadata": {},
"outputs": [],
"source": [
- "#get sum of haf to calculate probability area distribution\n",
+ "# get sum of haf to calculate probability area distribution\n",
"\n",
- "total_ha = sum([el['ha'] for el in test_china])\n",
+ "total_ha = sum([el[\"ha\"] for el in test_china])\n",
"\n",
- "#calculate probability area\n",
+ "# calculate probability area\n",
"\n",
"for el in test_china:\n",
- " p_dis = float((el['ha']*el['volume'])/total_ha) \n",
- " el['p_dis']=p_dis"
+ " p_dis = float((el[\"ha\"] * el[\"volume\"]) / total_ha)\n",
+ " el[\"p_dis\"] = p_dis"
]
},
{
@@ -2343,8 +2349,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#remove 0\n",
- "test_china = [el for el in test_china if el['p_dis'] !=0]"
+ "# remove 0\n",
+ "test_china = [el for el in test_china if el[\"p_dis\"] != 0]"
]
},
{
@@ -2355,8 +2361,11 @@
"outputs": [],
"source": [
"# export unique user data\n",
- "with open('../../datasets/processed/user_data/located_lg_data_polygon_v2_h3_unique_china_ha_pdis.json', 'w') as f:\n",
- " json.dump(test_china,f)\n"
+ "with open(\n",
+ " \"../../datasets/processed/user_data/located_lg_data_polygon_v2_h3_unique_china_ha_pdis.json\",\n",
+ " \"w\",\n",
+ ") as f:\n",
+ " json.dump(test_china, f)"
]
},
{
@@ -2367,8 +2376,11 @@
"outputs": [],
"source": [
"# export unique user data\n",
- "with open('../../datasets/processed/user_data/located_lg_data_polygon_v2_h3_unique_china_ha_pdis.json', 'r') as f:\n",
- " test_china = json.load(f)\n"
+ "with open(\n",
+ " \"../../datasets/processed/user_data/located_lg_data_polygon_v2_h3_unique_china_ha_pdis.json\",\n",
+ " \"r\",\n",
+ ") as f:\n",
+ " test_china = json.load(f)"
]
},
{
@@ -2455,8 +2467,10 @@
}
],
"source": [
- "#risk map in h3 - water risk cotton\n",
- "cotton_water_risk = pd.read_json('../../datasets/processed/water_indicators/water_risk_cotton_res6_mean_global_clean.json')\n",
+ "# risk map in h3 - water risk cotton\n",
+ "cotton_water_risk = pd.read_json(\n",
+ " \"../../datasets/processed/water_indicators/water_risk_cotton_res6_mean_global_clean.json\"\n",
+ ")\n",
"cotton_water_risk.head()"
]
},
@@ -2680,8 +2694,10 @@
}
],
"source": [
- "#calculation of metric\n",
- "merge_df = pd.merge(user_data_china,cotton_water_risk, how= 'inner', left_on='hexid', right_on='hexId')\n",
+ "# calculation of metric\n",
+ "merge_df = pd.merge(\n",
+ " user_data_china, cotton_water_risk, how=\"inner\", left_on=\"hexid\", right_on=\"hexId\"\n",
+ ")\n",
"merge_df.head()"
]
},
@@ -2692,16 +2708,16 @@
"metadata": {},
"outputs": [],
"source": [
- "#save as json\n",
+ "# save as json\n",
"china_impact = []\n",
- "for i,row in merge_df.iterrows():\n",
+ "for i, row in merge_df.iterrows():\n",
" element = {\n",
- " 'volume':row['volume'],\n",
- " 'hexid':row['hexid'],\n",
- " 'geometry':row['geometry'],\n",
- " 'impact':float(row['p_dis']*row['sumStats'])\n",
+ " \"volume\": row[\"volume\"],\n",
+ " \"hexid\": row[\"hexid\"],\n",
+ " \"geometry\": row[\"geometry\"],\n",
+ " \"impact\": float(row[\"p_dis\"] * row[\"sumStats\"]),\n",
" }\n",
- " china_impact.append(element)\n"
+ " china_impact.append(element)"
]
},
{
@@ -2711,8 +2727,8 @@
"metadata": {},
"outputs": [],
"source": [
- "with open('../../datasets/processed/water_indicators/water impact_china_h3.json','w') as f:\n",
- " json.dump(china_impact,f)"
+ "with open(\"../../datasets/processed/water_indicators/water impact_china_h3.json\", \"w\") as f:\n",
+ " json.dump(china_impact, f)"
]
},
{
@@ -2722,7 +2738,7 @@
"metadata": {},
"outputs": [],
"source": [
- "with open('../../datasets/processed/water_indicators/water impact_china_h3.json', 'r') as f:\n",
+ "with open(\"../../datasets/processed/water_indicators/water impact_china_h3.json\", \"r\") as f:\n",
" china_test = json.load(f)"
]
},
@@ -2920,7 +2936,7 @@
}
],
"source": [
- "gdf['geometry'] = [shape(row['geometry']) for i,row in gdf.iterrows()]\n",
+ "gdf[\"geometry\"] = [shape(row[\"geometry\"]) for i, row in gdf.iterrows()]\n",
"gdf.head()"
]
},
@@ -2931,9 +2947,7 @@
"metadata": {},
"outputs": [],
"source": [
- "gdf.to_file(\n",
- " './china_test.shp',\n",
- " driver='ESRI Shapefile')"
+ "gdf.to_file(\"./china_test.shp\", driver=\"ESRI Shapefile\")"
]
},
{
@@ -3003,11 +3017,11 @@
"mu, sigma = 0, 0.5\n",
"\n",
"measured = np.random.normal(mu, sigma, 1000)\n",
- "hist, edges = np.histogram(gdf['impact'], density=True, bins=50)\n",
+ "hist, edges = np.histogram(gdf[\"impact\"], density=True, bins=50)\n",
"\n",
"x = np.linspace(-2, 2, 1000)\n",
- "pdf = 1/(sigma * np.sqrt(2*np.pi)) * np.exp(-(x-mu)**2 / (2*sigma**2))\n",
- "cdf = (1+scipy.special.erf((x-mu)/np.sqrt(2*sigma**2)))/2\n",
+ "pdf = 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-((x - mu) ** 2) / (2 * sigma**2))\n",
+ "cdf = (1 + scipy.special.erf((x - mu) / np.sqrt(2 * sigma**2))) / 2\n",
"\n",
"p1 = make_plot(\"Normal Distribution (μ=0, σ=0.5)\", hist, edges, x, pdf, cdf)\n",
"\n",
@@ -3016,11 +3030,11 @@
"mu, sigma = 0, 0.5\n",
"\n",
"measured = np.random.lognormal(mu, sigma, 1000)\n",
- "hist, edges = np.histogram(gdf['impact'], density=True, bins=50)\n",
+ "hist, edges = np.histogram(gdf[\"impact\"], density=True, bins=50)\n",
"\n",
"x = np.linspace(0.0001, 8.0, 1000)\n",
- "pdf = 1/(x* sigma * np.sqrt(2*np.pi)) * np.exp(-(np.log(x)-mu)**2 / (2*sigma**2))\n",
- "cdf = (1+scipy.special.erf((np.log(x)-mu)/(np.sqrt(2)*sigma)))/2\n",
+ "pdf = 1 / (x * sigma * np.sqrt(2 * np.pi)) * np.exp(-((np.log(x) - mu) ** 2) / (2 * sigma**2))\n",
+ "cdf = (1 + scipy.special.erf((np.log(x) - mu) / (np.sqrt(2) * sigma))) / 2\n",
"\n",
"p2 = make_plot(\"Log Normal Distribution (μ=0, σ=0.5)\", hist, edges, x, pdf, cdf)\n",
"\n",
@@ -3029,29 +3043,29 @@
"k, theta = 7.5, 1.0\n",
"\n",
"measured = np.random.gamma(k, theta, 1000)\n",
- "hist, edges = np.histogram(gdf['impact'], density=True, bins=50)\n",
+ "hist, edges = np.histogram(gdf[\"impact\"], density=True, bins=50)\n",
"\n",
"x = np.linspace(0.0001, 20.0, 1000)\n",
- "pdf = x**(k-1) * np.exp(-x/theta) / (theta**k * scipy.special.gamma(k))\n",
- "cdf = scipy.special.gammainc(k, x/theta)\n",
+ "pdf = x ** (k - 1) * np.exp(-x / theta) / (theta**k * scipy.special.gamma(k))\n",
+ "cdf = scipy.special.gammainc(k, x / theta)\n",
"\n",
"p3 = make_plot(\"Gamma Distribution (k=7.5, θ=1)\", hist, edges, x, pdf, cdf)\n",
"\n",
"# Weibull Distribution\n",
"\n",
"lam, k = 1, 1.25\n",
- "measured = lam*(-np.log(np.random.uniform(0, 1, 1000)))**(1/k)\n",
- "hist, edges = np.histogram(gdf['impact'], density=True, bins=50)\n",
+ "measured = lam * (-np.log(np.random.uniform(0, 1, 1000))) ** (1 / k)\n",
+ "hist, edges = np.histogram(gdf[\"impact\"], density=True, bins=50)\n",
"\n",
"x = np.linspace(0.0001, 8, 1000)\n",
- "pdf = (k/lam)*(x/lam)**(k-1) * np.exp(-(x/lam)**k)\n",
- "cdf = 1 - np.exp(-(x/lam)**k)\n",
+ "pdf = (k / lam) * (x / lam) ** (k - 1) * np.exp(-((x / lam) ** k))\n",
+ "cdf = 1 - np.exp(-((x / lam) ** k))\n",
"\n",
"p4 = make_plot(\"Weibull Distribution (λ=1, k=1.25)\", hist, edges, x, pdf, cdf)\n",
"\n",
- "output_file('histogram.html', title=\"histogram.py example\")\n",
+ "output_file(\"histogram.html\", title=\"histogram.py example\")\n",
"\n",
- "show(gridplot([p1,p2,p3,p4], ncols=2, plot_width=400, plot_height=400, toolbar_location=None))"
+ "show(gridplot([p1, p2, p3, p4], ncols=2, plot_width=400, plot_height=400, toolbar_location=None))"
]
},
{
@@ -3070,8 +3084,9 @@
],
"source": [
"# using this h3 methodology - the total impact would be the sum of the distributed impacts\n",
- "print(f\"The unsustainable water use impact for buying 2400 tonnes of cotton in China would be {sum(gdf['impact'])} m3 / year\")\n",
- "\n"
+ "print(\n",
+ " f\"The unsustainable water use impact for buying 2400 tonnes of cotton in China would be {sum(gdf['impact'])} m3 / year\"\n",
+ ")"
]
},
{
@@ -3236,9 +3251,9 @@
}
],
"source": [
- "# download projection over time - \n",
+ "# download projection over time -\n",
"\n",
- "ha_00_19 = pd.read_csv('../../datasets/raw/crop_data/FAOSTAT_ha_2000_2019.csv')\n",
+ "ha_00_19 = pd.read_csv(\"../../datasets/raw/crop_data/FAOSTAT_ha_2000_2019.csv\")\n",
"ha_00_19.head()"
]
},
@@ -3260,7 +3275,7 @@
}
],
"source": [
- "ha_00_19[(ha_00_19['Year']==2000) & (ha_00_19['Area']=='Afghanistan')]['Value'][0]"
+ "ha_00_19[(ha_00_19[\"Year\"] == 2000) & (ha_00_19[\"Area\"] == \"Afghanistan\")][\"Value\"][0]"
]
},
{
@@ -3275,21 +3290,19 @@
"\n",
"ha_byYear = []\n",
"for country in unique_countries:\n",
- " element = {\n",
- " 'country': country\n",
- " }\n",
+ " element = {\"country\": country}\n",
" for year in unique_years:\n",
" try:\n",
- " value = float(list(ha_00_19[(ha_00_19['Area']==country) & (ha_00_19['Year']==year)]['Value'])[0])\n",
+ " value = float(\n",
+ " list(ha_00_19[(ha_00_19[\"Area\"] == country) & (ha_00_19[\"Year\"] == year)][\"Value\"])[\n",
+ " 0\n",
+ " ]\n",
+ " )\n",
" except:\n",
" value = 0\n",
- " \n",
- " \n",
- " element[f'{year}'] = value\n",
- " ha_byYear.append(element)\n",
- " \n",
- " \n",
- " "
+ "\n",
+ " element[f\"{year}\"] = value\n",
+ " ha_byYear.append(element)"
]
},
{
@@ -3710,12 +3723,33 @@
}
],
"source": [
- "pct_change_df = ha_df[['2000', '2001', '2002', '2003', '2004', '2005', '2006',\n",
- " '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015',\n",
- " '2016', '2017', '2018', '2019']].pct_change(axis=1)\n",
+ "pct_change_df = ha_df[\n",
+ " [\n",
+ " \"2000\",\n",
+ " \"2001\",\n",
+ " \"2002\",\n",
+ " \"2003\",\n",
+ " \"2004\",\n",
+ " \"2005\",\n",
+ " \"2006\",\n",
+ " \"2007\",\n",
+ " \"2008\",\n",
+ " \"2009\",\n",
+ " \"2010\",\n",
+ " \"2011\",\n",
+ " \"2012\",\n",
+ " \"2013\",\n",
+ " \"2014\",\n",
+ " \"2015\",\n",
+ " \"2016\",\n",
+ " \"2017\",\n",
+ " \"2018\",\n",
+ " \"2019\",\n",
+ " ]\n",
+ "].pct_change(axis=1)\n",
"\n",
- "#add countries\n",
- "pct_change_df['country']=ha_df['country']\n",
+ "# add countries\n",
+ "pct_change_df[\"country\"] = ha_df[\"country\"]\n",
"pct_change_df.head()"
]
},
@@ -3726,7 +3760,7 @@
"metadata": {},
"outputs": [],
"source": [
- "pct_change_df.to_csv('../../datasets/raw/crop_data/projection_factor_byCountry.csv')"
+ "pct_change_df.to_csv(\"../../datasets/raw/crop_data/projection_factor_byCountry.csv\")"
]
},
{
@@ -3840,9 +3874,9 @@
}
],
"source": [
- "#filter by china mainland\n",
- "pct_change_china = pct_change_df[pct_change_df['country']=='China, mainland']\n",
- "pct_change_china['2000']=0\n",
+ "# filter by china mainland\n",
+ "pct_change_china = pct_change_df[pct_change_df[\"country\"] == \"China, mainland\"]\n",
+ "pct_change_china[\"2000\"] = 0\n",
"pct_change_china"
]
},
@@ -3864,7 +3898,7 @@
}
],
"source": [
- "pct_change_china['2001'].iloc[0]"
+ "pct_change_china[\"2001\"].iloc[0]"
]
},
{
@@ -3961,40 +3995,51 @@
}
],
"source": [
- "#to json\n",
+ "# to json\n",
"pct_change_china_json = {}\n",
"for el in pct_change_china.columns:\n",
- " if el != 'country':\n",
- " pct_change_china_json[el]=pct_change_china[el].iloc[0]\n",
+ " if el != \"country\":\n",
+ " pct_change_china_json[el] = pct_change_china[el].iloc[0]\n",
"\n",
- "#total_volume is 2400\n",
+ "# total_volume is 2400\n",
"total_vol = 2400\n",
- "#value is going to be (2000val + (factor*2000val))\n",
- "#project average\n",
- "average_risk = sum(merge_df['sumStats'])/len(merge_df['sumStats'])\n",
- "pr_average_imp = [(average_risk + pct_change_china_json[f'{year}']*average_risk)*total_vol for year in range(2000,2020)]\n",
+ "# value is going to be (2000val + (factor*2000val))\n",
+ "# project average\n",
+ "average_risk = sum(merge_df[\"sumStats\"]) / len(merge_df[\"sumStats\"])\n",
+ "pr_average_imp = [\n",
+ " (average_risk + pct_change_china_json[f\"{year}\"] * average_risk) * total_vol\n",
+ " for year in range(2000, 2020)\n",
+ "]\n",
"\n",
- "#project min\n",
- "min_risk = min(merge_df['sumStats'])\n",
- "pr_min_imp = [(min_risk + pct_change_china_json[f'{year}']*min_risk)*total_vol for year in range(2000,2020)]\n",
+ "# project min\n",
+ "min_risk = min(merge_df[\"sumStats\"])\n",
+ "pr_min_imp = [\n",
+ " (min_risk + pct_change_china_json[f\"{year}\"] * min_risk) * total_vol\n",
+ " for year in range(2000, 2020)\n",
+ "]\n",
"\n",
- "#project max\n",
- "max_risk = max(merge_df['sumStats'])\n",
- "pr_max_imp = [(max_risk + pct_change_china_json[f'{year}']*max_risk)*total_vol for year in range(2000,2020)]\n",
+ "# project max\n",
+ "max_risk = max(merge_df[\"sumStats\"])\n",
+ "pr_max_imp = [\n",
+ " (max_risk + pct_change_china_json[f\"{year}\"] * max_risk) * total_vol\n",
+ " for year in range(2000, 2020)\n",
+ "]\n",
"\n",
"\n",
- "#project sum\n",
- "total_impact = sum(gdf['impact']) \n",
- "pr_total_imp = [(total_impact + pct_change_china_json[f'{year}']*total_impact) for year in range(2000,2020)]\n",
+ "# project sum\n",
+ "total_impact = sum(gdf[\"impact\"])\n",
+ "pr_total_imp = [\n",
+ " (total_impact + pct_change_china_json[f\"{year}\"] * total_impact) for year in range(2000, 2020)\n",
+ "]\n",
"\n",
"\n",
- "#generate dataframe\n",
+ "# generate dataframe\n",
"df = pd.DataFrame()\n",
- "df['year']=[year for year in range(2000,2020)]\n",
- "df['average_imp']=pr_average_imp\n",
- "df['min_imp']=pr_min_imp\n",
- "df['max_imp']=pr_max_imp\n",
- "df['total_imp']=pr_total_imp\n",
+ "df[\"year\"] = [year for year in range(2000, 2020)]\n",
+ "df[\"average_imp\"] = pr_average_imp\n",
+ "df[\"min_imp\"] = pr_min_imp\n",
+ "df[\"max_imp\"] = pr_max_imp\n",
+ "df[\"total_imp\"] = pr_total_imp\n",
"df.head()"
]
},
@@ -4070,19 +4115,26 @@
}
],
"source": [
- "df['year'] = pd.to_datetime(df['year'], format='%Y')\n",
+ "df[\"year\"] = pd.to_datetime(df[\"year\"], format=\"%Y\")\n",
"\n",
"source = ColumnDataSource(df)\n",
"\n",
"p = figure(x_axis_type=\"datetime\")\n",
"\n",
- "p.line(x='year', y='average_imp', line_width=2, source=source, legend='Average impact')\n",
- "p.line(x='year', y='min_imp', line_width=2, source=source, color=Spectral10[5], legend='Min impact')\n",
- "p.line(x='year', y='max_imp', line_width=2, source=source, color=Spectral10[9], legend='Max impact')\n",
- "p.line(x='year', y='total_imp', line_width=2, source=source, color=Spectral10[6], legend='Total impacts')\n",
+ "p.line(x=\"year\", y=\"average_imp\", line_width=2, source=source, legend=\"Average impact\")\n",
+ "p.line(x=\"year\", y=\"min_imp\", line_width=2, source=source, color=Spectral10[5], legend=\"Min impact\")\n",
+ "p.line(x=\"year\", y=\"max_imp\", line_width=2, source=source, color=Spectral10[9], legend=\"Max impact\")\n",
+ "p.line(\n",
+ " x=\"year\",\n",
+ " y=\"total_imp\",\n",
+ " line_width=2,\n",
+ " source=source,\n",
+ " color=Spectral10[6],\n",
+ " legend=\"Total impacts\",\n",
+ ")\n",
"\n",
- "p.title.text = 'Unsustainable water use impacts for Cotton in China'\n",
- "p.yaxis.axis_label = 'm3 / year'\n",
+ "p.title.text = \"Unsustainable water use impacts for Cotton in China\"\n",
+ "p.yaxis.axis_label = \"m3 / year\"\n",
"show(p)"
]
},
diff --git a/data/notebooks/Lab/0_4_H3_data_processing.ipynb b/data/notebooks/Lab/0_4_H3_data_processing.ipynb
index 299a8766b..15341d9c3 100644
--- a/data/notebooks/Lab/0_4_H3_data_processing.ipynb
+++ b/data/notebooks/Lab/0_4_H3_data_processing.ipynb
@@ -21,11 +21,12 @@
"outputs": [],
"source": [
"# import libraries\n",
- "import pandas as pd\n",
+ "import json\n",
+ "\n",
"import geopandas as gpd\n",
"import h3\n",
- "import json\n",
- "from shapely.geometry import shape, mapping"
+ "import pandas as pd\n",
+ "from shapely.geometry import shape"
]
},
{
@@ -34,11 +35,10 @@
"metadata": {},
"outputs": [],
"source": [
+ "import matplotlib.pyplot as plt\n",
"import rasterio as rio\n",
"import rasterio.plot\n",
- "import matplotlib.pyplot as plt\n",
- "from rasterstats import gen_zonal_stats, gen_point_query\n",
- "from rasterstats import zonal_stats"
+ "from rasterstats import gen_point_query, gen_zonal_stats, zonal_stats"
]
},
{
@@ -50,15 +50,6 @@
"import time"
]
},
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "from processing.geolocating_data import GeolocateAddress"
- ]
- },
{
"cell_type": "code",
"execution_count": 5,
@@ -190,9 +181,8 @@
],
"source": [
"# import user located data\n",
- "user_data_polygon = gpd.read_file('../../datasets/processed/located_lg_data_polygon_v2.shp')\n",
- "user_data_polygon.head()\n",
- "\n"
+ "user_data_polygon = gpd.read_file(\"../../datasets/processed/located_lg_data_polygon_v2.shp\")\n",
+ "user_data_polygon.head()"
]
},
{
@@ -212,7 +202,7 @@
}
],
"source": [
- "set(list(user_data_polygon['Material']))"
+ "set(list(user_data_polygon[\"Material\"]))"
]
},
{
@@ -242,7 +232,7 @@
}
],
"source": [
- "user_data_polygon[user_data_polygon['Material']=='Cotton'].iloc[4]"
+ "user_data_polygon[user_data_polygon[\"Material\"] == \"Cotton\"].iloc[4]"
]
},
{
@@ -332,8 +322,8 @@
}
],
"source": [
- "#lest select just one admin level 3 of india to reduce the computational time - this has been obtained from gadm\n",
- "polygon_gdf = gpd.read_file('../../datasets/raw/Punjab_adm.shp')\n",
+ "# lest select just one admin level 3 of india to reduce the computational time - this has been obtained from gadm\n",
+ "polygon_gdf = gpd.read_file(\"../../datasets/raw/Punjab_adm.shp\")\n",
"polygon_gdf"
]
},
@@ -357,9 +347,9 @@
}
],
"source": [
- "#select test location to perform calculations\n",
+ "# select test location to perform calculations\n",
"\n",
- "polygon = polygon_gdf.iloc[0]['geometry']\n",
+ "polygon = polygon_gdf.iloc[0][\"geometry\"]\n",
"polygon"
]
},
@@ -544,7 +534,7 @@
"source": [
"## import basins shapefile to test with front\n",
"\n",
- "basins = gpd.read_file('../../datasets/raw/basins_test_polygon.shp')\n",
+ "basins = gpd.read_file(\"../../datasets/raw/basins_test_polygon.shp\")\n",
"basins.head()"
]
},
@@ -568,7 +558,7 @@
}
],
"source": [
- "basins['geometry'][0]"
+ "basins[\"geometry\"][0]"
]
},
{
@@ -597,15 +587,15 @@
}
],
"source": [
- "#import blue water footprint cotton\n",
- "with rio.open('../../datasets/raw/wfbl_mmyr_4326_cotton.tif') as src:\n",
+ "# import blue water footprint cotton\n",
+ "with rio.open(\"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\") as src:\n",
" dat = src.read(1)\n",
- " fig, ax = plt.subplots(figsize=[15,10])\n",
- " ax.set_ylim((29.5,30.5))\n",
- " ax.set_xlim((75,76))\n",
- " rio.plot.show(dat, vmin=0, vmax=444, cmap='Blues', ax=ax, transform=src.transform)\n",
- " polygon_gdf.plot(ax=ax, alpha=0.5, edgecolor='yellow')\n",
- " ax.set_title('Cotton blue water footprint in India (dark blue: higher water footprint)')"
+ " fig, ax = plt.subplots(figsize=[15, 10])\n",
+ " ax.set_ylim((29.5, 30.5))\n",
+ " ax.set_xlim((75, 76))\n",
+ " rio.plot.show(dat, vmin=0, vmax=444, cmap=\"Blues\", ax=ax, transform=src.transform)\n",
+ " polygon_gdf.plot(ax=ax, alpha=0.5, edgecolor=\"yellow\")\n",
+ " ax.set_title(\"Cotton blue water footprint in India (dark blue: higher water footprint)\")"
]
},
{
@@ -626,39 +616,35 @@
"def generate_h3_df(geometry, res):\n",
" \"\"\"\n",
" Generate h3 for geometry\n",
- " \n",
+ "\n",
" Input\n",
" ------\n",
" geometry: shapely.polygon or shapely.multipolygon\n",
- " \n",
+ "\n",
" Output\n",
" ------\n",
" gdf with H3_hexes\n",
" \"\"\"\n",
" # Create an empty dataframe to write data into\n",
- " h3_df = pd.DataFrame([],columns=['h3_id'])\n",
- " if geometry.geom_type == 'MultiPolygon':\n",
+ " h3_df = pd.DataFrame([], columns=[\"h3_id\"])\n",
+ " if geometry.geom_type == \"MultiPolygon\":\n",
" district_polygon = list(geometry)\n",
" for polygon in district_polygon:\n",
" poly_geojson = gpd.GeoSeries([polygon]).__geo_interface__\n",
- " poly_geojson = poly_geojson['features'][0]['geometry'] \n",
+ " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n",
" h3_hexes = h3.polyfill_geojson(poly_geojson, res)\n",
" for h3_hex in h3_hexes:\n",
- " h3_df.loc[len(h3_df)]=[\n",
- " h3_hex\n",
- " ]\n",
- " elif geometry.geom_type == 'Polygon':\n",
+ " h3_df.loc[len(h3_df)] = [h3_hex]\n",
+ " elif geometry.geom_type == \"Polygon\":\n",
" poly_geojson = gpd.GeoSeries(geometry).__geo_interface__\n",
- " poly_geojson = poly_geojson['features'][0]['geometry']\n",
+ " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n",
" h3_hexes = h3.polyfill_geojson(poly_geojson, res)\n",
" for h3_hex in h3_hexes:\n",
- " h3_df.loc[len(h3_df)]=[\n",
- " h3_hex\n",
- " ]\n",
+ " h3_df.loc[len(h3_df)] = [h3_hex]\n",
" else:\n",
- " print('Shape is not a polygon or multypolygon.')\n",
- " \n",
- " return h3_df\n"
+ " print(\"Shape is not a polygon or multypolygon.\")\n",
+ "\n",
+ " return h3_df"
]
},
{
@@ -670,22 +656,22 @@
"def generate_h3_features(geometry, res):\n",
" \"\"\"\n",
" Generate h3 for geometry\n",
- " \n",
+ "\n",
" Input\n",
" ------\n",
" geometry: shapely.polygon or shapely.multipolygon\n",
- " \n",
+ "\n",
" Output\n",
" ------\n",
" gdf with H3_hexes\n",
" \"\"\"\n",
" # Create an empty dataframe to write data into\n",
- " h3_df = pd.DataFrame([],columns=['h3_id'])\n",
- " if geometry.geom_type == 'MultiPolygon':\n",
+ " pd.DataFrame([], columns=[\"h3_id\"])\n",
+ " if geometry.geom_type == \"MultiPolygon\":\n",
" district_polygon = list(geometry)\n",
" for polygon in district_polygon:\n",
" poly_geojson = gpd.GeoSeries([polygon]).__geo_interface__\n",
- " poly_geojson = poly_geojson['features'][0]['geometry'] \n",
+ " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n",
" h3_hexes = h3.polyfill_geojson(poly_geojson, res)\n",
" for h3_hex in h3_hexes:\n",
" coords = h3.h3_set_to_multi_polygon([h3_hex], geo_json=True)\n",
@@ -694,9 +680,9 @@
" \"properties\": {\"hexid\": h3_hex},\n",
" \"geometry\": {\"type\": \"Polygon\", \"coordinates\": coords[0]},\n",
" }\n",
- " elif geometry.geom_type == 'Polygon':\n",
+ " elif geometry.geom_type == \"Polygon\":\n",
" poly_geojson = gpd.GeoSeries(geometry).__geo_interface__\n",
- " poly_geojson = poly_geojson['features'][0]['geometry']\n",
+ " poly_geojson = poly_geojson[\"features\"][0][\"geometry\"]\n",
" h3_hexes = h3.polyfill_geojson(poly_geojson, res)\n",
" for h3_hex in h3_hexes:\n",
" coords = h3.h3_set_to_multi_polygon([h3_hex], geo_json=True)\n",
@@ -706,10 +692,7 @@
" \"geometry\": {\"type\": \"Polygon\", \"coordinates\": coords[0]},\n",
" }\n",
" else:\n",
- " print('Shape is not a polygon or multypolygon.')\n",
- " \n",
- " \n",
- " "
+ " print(\"Shape is not a polygon or multypolygon.\")"
]
},
{
@@ -728,7 +711,7 @@
"source": [
"## time to process the entire malasya\n",
"start_time = time.time()\n",
- "h3_adm_df = generate_h3_df(user_data_polygon['geometry'][1], 8)\n",
+ "h3_adm_df = generate_h3_df(user_data_polygon[\"geometry\"][1], 8)\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -746,7 +729,7 @@
}
],
"source": [
- "## time to process the test geometry - \n",
+ "## time to process the test geometry -\n",
"start_time = time.time()\n",
"h3_adm_df = generate_h3_df(polygon, 8)\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
@@ -838,9 +821,9 @@
}
],
"source": [
- "#time spend in generating the features in h3 for Malasya\n",
+ "# time spend in generating the features in h3 for Malasya\n",
"start_time = time.time()\n",
- "h3_features = generate_h3_features(test_location['geometry'], 8)\n",
+ "h3_features = generate_h3_features(test_location[\"geometry\"], 8)\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -858,7 +841,7 @@
}
],
"source": [
- "#time spend in generating the features in h3 for the test polygon\n",
+ "# time spend in generating the features in h3 for the test polygon\n",
"start_time = time.time()\n",
"h3_features = generate_h3_features(polygon, 8)\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
@@ -878,9 +861,9 @@
}
],
"source": [
- "#time spend in generating the features in h3 for the basins test in resolution 1\n",
+ "# time spend in generating the features in h3 for the basins test in resolution 1\n",
"start_time = time.time()\n",
- "h3_features = [generate_h3_features(poly, 4) for poly in basins['geometry']]\n",
+ "h3_features = [generate_h3_features(poly, 4) for poly in basins[\"geometry\"]]\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -898,9 +881,9 @@
}
],
"source": [
- "#time spend in generating the features in h3 for the basins test in resolution 1\n",
+ "# time spend in generating the features in h3 for the basins test in resolution 1\n",
"start_time = time.time()\n",
- "h3_features_res5 = [generate_h3_features(poly, 5) for poly in basins['geometry']]\n",
+ "h3_features_res5 = [generate_h3_features(poly, 5) for poly in basins[\"geometry\"]]\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -918,9 +901,9 @@
}
],
"source": [
- "#time spend in generating the features in h3 for the basins test in resolution 1\n",
+ "# time spend in generating the features in h3 for the basins test in resolution 1\n",
"start_time = time.time()\n",
- "h3_features_res7 = [generate_h3_features(poly, 7) for poly in basins['geometry']]\n",
+ "h3_features_res7 = [generate_h3_features(poly, 7) for poly in basins[\"geometry\"]]\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -945,21 +928,18 @@
}
],
"source": [
- "\n",
- "#summary statistics world main basins\n",
+ "# summary statistics world main basins\n",
"start_time = time.time()\n",
- "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n",
+ "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n",
"# Outputs hexbin feautres with additional properties: \"population_sum\": \n",
- "summ_stats_h3 = [gen_zonal_stats(\n",
- " generator,\n",
- " raster_path,\n",
- " stats=\"max\",\n",
- " prefix=\"m_\",\n",
- " geojson_out=True,\n",
- " all_touched=True\n",
- " ) for generator in h3_features]\n",
- " \n",
- " \n",
+ "summ_stats_h3 = [\n",
+ " gen_zonal_stats(\n",
+ " generator, raster_path, stats=\"max\", prefix=\"m_\", geojson_out=True, all_touched=True\n",
+ " )\n",
+ " for generator in h3_features\n",
+ "]\n",
+ "\n",
+ "\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -977,21 +957,18 @@
}
],
"source": [
- "\n",
- "#summary statistics world main basins\n",
+ "# summary statistics world main basins\n",
"start_time = time.time()\n",
- "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n",
+ "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n",
"# Outputs hexbin feautres with additional properties: \"population_sum\": \n",
- "summ_stats_h3_res5 = [gen_zonal_stats(\n",
- " generator,\n",
- " raster_path,\n",
- " stats=\"max\",\n",
- " prefix=\"m_\",\n",
- " geojson_out=True,\n",
- " all_touched=True\n",
- " ) for generator in h3_features_res5]\n",
- " \n",
- " \n",
+ "summ_stats_h3_res5 = [\n",
+ " gen_zonal_stats(\n",
+ " generator, raster_path, stats=\"max\", prefix=\"m_\", geojson_out=True, all_touched=True\n",
+ " )\n",
+ " for generator in h3_features_res5\n",
+ "]\n",
+ "\n",
+ "\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -1009,21 +986,18 @@
}
],
"source": [
- "\n",
- "#summary statistics world main basins\n",
+ "# summary statistics world main basins\n",
"start_time = time.time()\n",
- "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n",
+ "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n",
"# Outputs hexbin feautres with additional properties: \"population_sum\": \n",
- "summ_stats_h3_res7 = [gen_zonal_stats(\n",
- " generator,\n",
- " raster_path,\n",
- " stats=\"max\",\n",
- " prefix=\"m_\",\n",
- " geojson_out=True,\n",
- " all_touched=True\n",
- " ) for generator in h3_features_res7]\n",
- " \n",
- " \n",
+ "summ_stats_h3_res7 = [\n",
+ " gen_zonal_stats(\n",
+ " generator, raster_path, stats=\"max\", prefix=\"m_\", geojson_out=True, all_touched=True\n",
+ " )\n",
+ " for generator in h3_features_res7\n",
+ "]\n",
+ "\n",
+ "\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -1041,10 +1015,10 @@
}
],
"source": [
- "#summary statistics in malasya\n",
+ "# summary statistics in malasya\n",
"start_time = time.time()\n",
- "hexbin_generator = generate_h3_features(user_data_polygon['geometry'][1], 8)\n",
- "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n",
+ "hexbin_generator = generate_h3_features(user_data_polygon[\"geometry\"][1], 8)\n",
+ "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n",
"# Outputs hexbin feautres with additional properties: \"population_sum\": \n",
"summ_stats_h3 = gen_zonal_stats(\n",
" hexbin_generator,\n",
@@ -1070,10 +1044,10 @@
}
],
"source": [
- "#summary statistics in test geometry - adm level 3\n",
+ "# summary statistics in test geometry - adm level 3\n",
"start_time = time.time()\n",
"hexbin_generator = generate_h3_features(polygon, 8)\n",
- "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n",
+ "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n",
"# Outputs hexbin feautres with additional properties: \"population_sum\": \n",
"summ_stats_h3 = gen_zonal_stats(\n",
" hexbin_generator,\n",
@@ -1081,7 +1055,7 @@
" stats=\"median std\",\n",
" prefix=\"wfbl_mmyr_cotton\",\n",
" geojson_out=True,\n",
- " all_touched=True\n",
+ " all_touched=True,\n",
")\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
@@ -1100,10 +1074,10 @@
}
],
"source": [
- "#summary statistics in test geometry - adm level 3\n",
+ "# summary statistics in test geometry - adm level 3\n",
"start_time = time.time()\n",
"hexbin_generator = generate_h3_features(polygon, 4)\n",
- "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n",
+ "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n",
"# Outputs hexbin feautres with additional properties: \"population_sum\": \n",
"summ_stats_h3_res4 = gen_zonal_stats(\n",
" hexbin_generator,\n",
@@ -1129,10 +1103,10 @@
}
],
"source": [
- "#summary statistics in test geometry - adm level 3\n",
+ "# summary statistics in test geometry - adm level 3\n",
"start_time = time.time()\n",
"hexbin_generator = generate_h3_features(polygon, 6)\n",
- "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n",
+ "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n",
"# Outputs hexbin feautres with additional properties: \"population_sum\": \n",
"summ_stats_h3_res6 = gen_zonal_stats(\n",
" hexbin_generator,\n",
@@ -1158,10 +1132,10 @@
}
],
"source": [
- "#summary statistics in test geometry - adm level 3\n",
+ "# summary statistics in test geometry - adm level 3\n",
"start_time = time.time()\n",
"hexbin_generator = generate_h3_features(polygon, 5)\n",
- "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n",
+ "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n",
"# Outputs hexbin feautres with additional properties: \"population_sum\": \n",
"summ_stats_h3_res5 = gen_zonal_stats(\n",
" hexbin_generator,\n",
@@ -1187,14 +1161,11 @@
}
],
"source": [
- "#summary statistics in test geometry - adm level 3\n",
+ "# summary statistics in test geometry - adm level 3\n",
"start_time = time.time()\n",
"hexbin_generator = generate_h3_features(polygon, 8)\n",
- "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n",
- "zs_h3= zonal_stats(\n",
- " hexbin_generator,\n",
- " raster_path,\n",
- " stats=\"median\")\n",
+ "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n",
+ "zs_h3 = zonal_stats(hexbin_generator, raster_path, stats=\"median\")\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -1212,17 +1183,17 @@
}
],
"source": [
- "#summary statistics in test geometry - adm level 3\n",
+ "# summary statistics in test geometry - adm level 3\n",
"start_time = time.time()\n",
"hexbin_generator = generate_h3_features(polygon, 6)\n",
- "raster_path = '../../datasets/raw/wfbl_mmyr_4326_cotton.tif'\n",
+ "raster_path = \"../../datasets/raw/wfbl_mmyr_4326_cotton.tif\"\n",
"gpq_stats_h3_res6 = gen_point_query(\n",
" hexbin_generator,\n",
" raster_path,\n",
- " interpolate = 'nearest',\n",
- " property_name = 'bl_gpq_',\n",
+ " interpolate=\"nearest\",\n",
+ " property_name=\"bl_gpq_\",\n",
" geojson_out=True,\n",
- " )\n",
+ ")\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -1251,7 +1222,7 @@
"# check features of basins worldwide\n",
"for generator in summ_stats_h3:\n",
" for feature in generator:\n",
- " print(feature['properties']['m_max'])\n",
+ " print(feature[\"properties\"][\"m_max\"])\n",
" break"
]
},
@@ -1345,7 +1316,6 @@
}
],
"source": [
- "\n",
"for feature in gpq_stats_h3_res6:\n",
" print(feature)\n",
" break"
@@ -1365,15 +1335,15 @@
}
],
"source": [
- "#generate a dataframe with the elements\n",
+ "# generate a dataframe with the elements\n",
"start_time = time.time()\n",
- "h3_gdf_wfbl = pd.DataFrame([],columns=['h3_id', 'wfbl_mmyr_cotton_median', 'geometry'])\n",
+ "h3_gdf_wfbl = pd.DataFrame([], columns=[\"h3_id\", \"wfbl_mmyr_cotton_median\", \"geometry\"])\n",
"for feature in summ_stats_h3:\n",
- " h3_gdf_wfbl.loc[len(h3_gdf_wfbl)]=[\n",
- " feature['properties']['hexid'],\n",
- " feature['properties']['wfbl_mmyr_cottonmedian'],\n",
- " shape(feature['geometry'])\n",
- " ]\n",
+ " h3_gdf_wfbl.loc[len(h3_gdf_wfbl)] = [\n",
+ " feature[\"properties\"][\"hexid\"],\n",
+ " feature[\"properties\"][\"wfbl_mmyr_cottonmedian\"],\n",
+ " shape(feature[\"geometry\"]),\n",
+ " ]\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -1391,15 +1361,15 @@
}
],
"source": [
- "#generate a dataframe with the elements\n",
+ "# generate a dataframe with the elements\n",
"start_time = time.time()\n",
- "h3_gdf_wfbl_res4 = pd.DataFrame([],columns=['h3_id', 'wfbl_mmyr_cotton_median', 'geometry'])\n",
+ "h3_gdf_wfbl_res4 = pd.DataFrame([], columns=[\"h3_id\", \"wfbl_mmyr_cotton_median\", \"geometry\"])\n",
"for feature in summ_stats_h3_res4:\n",
- " h3_gdf_wfbl_res4.loc[len(h3_gdf_wfbl_res4)]=[\n",
- " feature['properties']['hexid'],\n",
- " feature['properties']['wfbl_mmyr_cottonmedian'],\n",
- " shape(feature['geometry'])\n",
- " ]\n",
+ " h3_gdf_wfbl_res4.loc[len(h3_gdf_wfbl_res4)] = [\n",
+ " feature[\"properties\"][\"hexid\"],\n",
+ " feature[\"properties\"][\"wfbl_mmyr_cottonmedian\"],\n",
+ " shape(feature[\"geometry\"]),\n",
+ " ]\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -1417,15 +1387,15 @@
}
],
"source": [
- "#generate a dataframe with the elements\n",
+ "# generate a dataframe with the elements\n",
"start_time = time.time()\n",
- "h3_gdf_wfbl_res6 = pd.DataFrame([],columns=['h3_id', 'wfbl_mmyr_cotton_median', 'geometry'])\n",
+ "h3_gdf_wfbl_res6 = pd.DataFrame([], columns=[\"h3_id\", \"wfbl_mmyr_cotton_median\", \"geometry\"])\n",
"for feature in summ_stats_h3_res6:\n",
- " h3_gdf_wfbl_res6.loc[len(h3_gdf_wfbl_res6)]=[\n",
- " feature['properties']['hexid'],\n",
- " feature['properties']['wfbl_mmyr_cottonmedian'],\n",
- " shape(feature['geometry'])\n",
- " ]\n",
+ " h3_gdf_wfbl_res6.loc[len(h3_gdf_wfbl_res6)] = [\n",
+ " feature[\"properties\"][\"hexid\"],\n",
+ " feature[\"properties\"][\"wfbl_mmyr_cottonmedian\"],\n",
+ " shape(feature[\"geometry\"]),\n",
+ " ]\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -1443,15 +1413,15 @@
}
],
"source": [
- "#generate a dataframe with the elements\n",
+ "# generate a dataframe with the elements\n",
"start_time = time.time()\n",
- "h3_gdf_wfbl_res5 = pd.DataFrame([],columns=['h3_id', 'wfbl_mmyr_cotton_median', 'geometry'])\n",
+ "h3_gdf_wfbl_res5 = pd.DataFrame([], columns=[\"h3_id\", \"wfbl_mmyr_cotton_median\", \"geometry\"])\n",
"for feature in summ_stats_h3_res5:\n",
- " h3_gdf_wfbl_res5.loc[len(h3_gdf_wfbl_res5)]=[\n",
- " feature['properties']['hexid'],\n",
- " feature['properties']['wfbl_mmyr_cottonmedian'],\n",
- " shape(feature['geometry'])\n",
- " ]\n",
+ " h3_gdf_wfbl_res5.loc[len(h3_gdf_wfbl_res5)] = [\n",
+ " feature[\"properties\"][\"hexid\"],\n",
+ " feature[\"properties\"][\"wfbl_mmyr_cottonmedian\"],\n",
+ " shape(feature[\"geometry\"]),\n",
+ " ]\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -1476,12 +1446,12 @@
"for generator in summ_stats_h3:\n",
" for feature in generator:\n",
" element = {\n",
- " 'max':feature['properties']['m_max'],\n",
- " 'hexId':feature['properties']['hexid'], \n",
+ " \"max\": feature[\"properties\"][\"m_max\"],\n",
+ " \"hexId\": feature[\"properties\"][\"hexid\"],\n",
" }\n",
" array_res1.append(element)\n",
- " \n",
- "print(\"--- %s seconds ---\" % (time.time() - start_time)) "
+ "\n",
+ "print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
{
@@ -1505,12 +1475,12 @@
"for generator in summ_stats_h3_res5:\n",
" for feature in generator:\n",
" element = {\n",
- " 'max':feature['properties']['m_max'],\n",
- " 'hexId':feature['properties']['hexid'], \n",
+ " \"max\": feature[\"properties\"][\"m_max\"],\n",
+ " \"hexId\": feature[\"properties\"][\"hexid\"],\n",
" }\n",
" array_res5.append(element)\n",
- " \n",
- "print(\"--- %s seconds ---\" % (time.time() - start_time))\n"
+ "\n",
+ "print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
{
@@ -1537,20 +1507,19 @@
}
],
"source": [
- "\n",
"## generate json for res1\n",
"# check features of basins worldwide\n",
"start_time = time.time()\n",
"array_res7 = []\n",
"for generator in summ_stats_h3_res7:\n",
" for feature in generator:\n",
- " if feature['properties']['m_max'] !=0:\n",
+ " if feature[\"properties\"][\"m_max\"] != 0:\n",
" element = {\n",
- " 'max':feature['properties']['m_max'],\n",
- " 'hexId':feature['properties']['hexid'], \n",
+ " \"max\": feature[\"properties\"][\"m_max\"],\n",
+ " \"hexId\": feature[\"properties\"][\"hexid\"],\n",
" }\n",
" array_res7.append(element)\n",
- " \n",
+ "\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -1569,7 +1538,7 @@
"metadata": {},
"outputs": [],
"source": [
- "with open('./array_res4_v3.json', 'w') as f:\n",
+ "with open(\"./array_res4_v3.json\", \"w\") as f:\n",
" json.dump(array_res1, f)"
]
},
@@ -2599,7 +2568,7 @@
"metadata": {},
"outputs": [],
"source": [
- "with open('./data_res5_v2.json', 'r') as f:\n",
+ "with open(\"./data_res5_v2.json\", \"r\") as f:\n",
" res_5 = json.load(f)"
]
},
@@ -3639,7 +3608,7 @@
"metadata": {},
"outputs": [],
"source": [
- "with open('./data_res4_4.json', 'r') as f:\n",
+ "with open(\"./data_res4_4.json\", \"r\") as f:\n",
" res_4 = json.load(f)"
]
},
@@ -3650,8 +3619,8 @@
"outputs": [],
"source": [
"for row in res_4:\n",
- " #print(row['hexId'])\n",
- " row['hexId'] = list(h3.h3_to_children(row['hexId'], 5))"
+ " # print(row['hexId'])\n",
+ " row[\"hexId\"] = list(h3.h3_to_children(row[\"hexId\"], 5))"
]
},
{
@@ -3660,7 +3629,7 @@
"metadata": {},
"outputs": [],
"source": [
- "res_4_5 = res_4.insert(0,res_5)"
+ "res_4_5 = res_4.insert(0, res_5)"
]
},
{
@@ -3669,8 +3638,8 @@
"metadata": {},
"outputs": [],
"source": [
- "with open('./data_res4_5.json', 'w') as f:\n",
- " json.dump(res_4,f)"
+ "with open(\"./data_res4_5.json\", \"w\") as f:\n",
+ " json.dump(res_4, f)"
]
},
{
@@ -3688,9 +3657,9 @@
"metadata": {},
"outputs": [],
"source": [
- "#export \n",
- "with open('./data_res4_5_joined.json', 'w') as f:\n",
- " json.dump(res_join,f)"
+ "# export\n",
+ "with open(\"./data_res4_5_joined.json\", \"w\") as f:\n",
+ " json.dump(res_join, f)"
]
},
{
@@ -11719,8 +11688,7 @@
"metadata": {},
"outputs": [],
"source": [
- "res_join_noNull = [el for el in res_join if el['max'] != 0]\n",
- " "
+ "res_join_noNull = [el for el in res_join if el[\"max\"] != 0]"
]
},
{
@@ -11729,9 +11697,9 @@
"metadata": {},
"outputs": [],
"source": [
- "#export \n",
- "with open('./data_res4_5_joined_noNull.json', 'w') as f:\n",
- " json.dump(res_join_noNull,f)\n"
+ "# export\n",
+ "with open(\"./data_res4_5_joined_noNull.json\", \"w\") as f:\n",
+ " json.dump(res_join_noNull, f)"
]
},
{
@@ -11740,7 +11708,7 @@
"metadata": {},
"outputs": [],
"source": [
- "h3_gdf_wfbl.to_csv('../../datasets/processed/h3_summary_stats_test_india_res8_att.csv')"
+ "h3_gdf_wfbl.to_csv(\"../../datasets/processed/h3_summary_stats_test_india_res8_att.csv\")"
]
},
{
@@ -11749,7 +11717,7 @@
"metadata": {},
"outputs": [],
"source": [
- "h3_gdf_wfbl_res4.to_csv('../../datasets/processed/h3_summary_stats_test_india_res4.csv')"
+ "h3_gdf_wfbl_res4.to_csv(\"../../datasets/processed/h3_summary_stats_test_india_res4.csv\")"
]
},
{
@@ -11758,7 +11726,7 @@
"metadata": {},
"outputs": [],
"source": [
- "h3_gdf_wfbl_res6.to_csv('../../datasets/processed/h3_summary_stats_test_india_res6.csv')"
+ "h3_gdf_wfbl_res6.to_csv(\"../../datasets/processed/h3_summary_stats_test_india_res6.csv\")"
]
},
{
@@ -11767,7 +11735,7 @@
"metadata": {},
"outputs": [],
"source": [
- "h3_gdf_wfbl_res5.to_csv('../../datasets/processed/h3_summary_stats_test_india_res5.csv')"
+ "h3_gdf_wfbl_res5.to_csv(\"../../datasets/processed/h3_summary_stats_test_india_res5.csv\")"
]
},
{
@@ -11810,7 +11778,7 @@
"# percentage of cells with value zero at varius index resolutions\n",
"\n",
"msg_ = \"Percentage of cells with value zero at resolution {}: {} %\"\n",
- "perc_hexes_zeros = 100 * (len(h3_gdf_wfbl)- len(h3_gdf_wfbl_noNan)) / len(h3_gdf_wfbl)\n",
+ "perc_hexes_zeros = 100 * (len(h3_gdf_wfbl) - len(h3_gdf_wfbl_noNan)) / len(h3_gdf_wfbl)\n",
"print(msg_.format(8, round(perc_hexes_zeros, 2)))"
]
},
@@ -11820,8 +11788,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#we can interpolate for visualization options\n",
- "df_test = h3_gdf_wfbl.interpolate(method='nearest')"
+ "# we can interpolate for visualization options\n",
+ "df_test = h3_gdf_wfbl.interpolate(method=\"nearest\")"
]
},
{
@@ -11972,7 +11940,7 @@
"metadata": {},
"outputs": [],
"source": [
- "df_test.to_csv('../../datasets/processed/h3_summary_stats_test_india_interpolated.csv')"
+ "df_test.to_csv(\"../../datasets/processed/h3_summary_stats_test_india_interpolated.csv\")"
]
},
{
@@ -12088,7 +12056,7 @@
"metadata": {},
"outputs": [],
"source": [
- "h3_gdf_wfbl.insert(3, 'nearest_geometry', None)"
+ "h3_gdf_wfbl.insert(3, \"nearest_geometry\", None)"
]
},
{
@@ -12097,8 +12065,8 @@
"metadata": {},
"outputs": [],
"source": [
- "points = [row.geometry. centroid for index, row in h3_gdf_wfbl.iterrows() ]\n",
- "h3_gdf_wfbl['points']=points"
+ "points = [row.geometry.centroid for index, row in h3_gdf_wfbl.iterrows()]\n",
+ "h3_gdf_wfbl[\"points\"] = points"
]
},
{
@@ -12318,7 +12286,7 @@
}
],
"source": [
- "#drop nan values\n",
+ "# drop nan values\n",
"h3_gdf_wfbl_noNan = h3_gdf_wfbl.dropna()\n",
"h3_gdf_wfbl_noNan.head()"
]
@@ -12329,13 +12297,12 @@
"metadata": {},
"outputs": [],
"source": [
- "\n",
"for index, row in h3_gdf_wfbl.iterrows():\n",
" point = row.points\n",
- " #multipoint = h3_gdf_wfbl.drop(index, axis=0).points.unary_union\n",
+ " # multipoint = h3_gdf_wfbl.drop(index, axis=0).points.unary_union\n",
" multipoint = h3_gdf_wfbl_noNan.points.unary_union\n",
" queried_geom, nearest_geom = nearest_points(point, multipoint)\n",
- " h3_gdf_wfbl.loc[index, 'nearest_geometry'] = nearest_geom"
+ " h3_gdf_wfbl.loc[index, \"nearest_geometry\"] = nearest_geom"
]
},
{
@@ -12455,9 +12422,10 @@
"outputs": [],
"source": [
"for index, row in h3_gdf_wfbl.iterrows():\n",
- " nearest_value = h3_gdf_wfbl_noNan[h3_gdf_wfbl_noNan['points']== h3_gdf_wfbl.iloc[index]['nearest_geometry']].iloc[0]['wfbl_mmyr_cotton_median']\n",
- " h3_gdf_wfbl.loc[index, 'wfbl_mmyr_cotton_median'] = nearest_value\n",
- " "
+ " nearest_value = h3_gdf_wfbl_noNan[\n",
+ " h3_gdf_wfbl_noNan[\"points\"] == h3_gdf_wfbl.iloc[index][\"nearest_geometry\"]\n",
+ " ].iloc[0][\"wfbl_mmyr_cotton_median\"]\n",
+ " h3_gdf_wfbl.loc[index, \"wfbl_mmyr_cotton_median\"] = nearest_value"
]
},
{
@@ -12645,9 +12613,9 @@
"metadata": {},
"outputs": [],
"source": [
- "#set the hexagon as geometry and export\n",
- "h3_gdf_wfbl = h3_gdf_wfbl.set_geometry('geometry')[['h3_id', 'wfbl_mmyr_cotton_median', 'geometry']]\n",
- "h3_gdf_wfbl.to_csv('../../datasets/processed/h3_summary_stats_test_india_interpolated_nearest.csv')"
+ "# set the hexagon as geometry and export\n",
+ "h3_gdf_wfbl = h3_gdf_wfbl.set_geometry(\"geometry\")[[\"h3_id\", \"wfbl_mmyr_cotton_median\", \"geometry\"]]\n",
+ "h3_gdf_wfbl.to_csv(\"../../datasets/processed/h3_summary_stats_test_india_interpolated_nearest.csv\")"
]
},
{
diff --git a/data/notebooks/Lab/0_5_Area_distribution.ipynb b/data/notebooks/Lab/0_5_Area_distribution.ipynb
index 51faea0dc..fd835c6c9 100644
--- a/data/notebooks/Lab/0_5_Area_distribution.ipynb
+++ b/data/notebooks/Lab/0_5_Area_distribution.ipynb
@@ -56,13 +56,13 @@
"outputs": [],
"source": [
"## import libraries\n",
- "import geopandas as gpd\n",
"import time\n",
"\n",
- "from rasterstats import zonal_stats\n",
+ "import geopandas as gpd\n",
+ "import matplotlib.pyplot as plt\n",
"import rasterio as rio\n",
"import rasterio.plot\n",
- "import matplotlib.pyplot as plt"
+ "from rasterstats import zonal_stats"
]
},
{
@@ -208,7 +208,7 @@
"source": [
"## import user data\n",
"## projection of user data is epsg:4326\n",
- "input_data = gpd.read_file('../../datasets/processed/user_data/located_lg_data_polygon_v2.shp') \n",
+ "input_data = gpd.read_file(\"../../datasets/processed/user_data/located_lg_data_polygon_v2.shp\")\n",
"input_data.head()"
]
},
@@ -229,7 +229,7 @@
"source": [
"## check the commodities materials - we will need to generate a distribution map for each of the commodities.\n",
"\n",
- "print(f'The uniques commodities from the user data are:', set(input_data['Material']))"
+ "print(\"The uniques commodities from the user data are:\", set(input_data[\"Material\"]))"
]
},
{
@@ -311,7 +311,11 @@
}
],
"source": [
- "test_location = input_data.loc[(input_data['Material']=='Cotton') & (input_data['Country']=='India') & ((input_data['Volume']==745) )]\n",
+ "test_location = input_data.loc[\n",
+ " (input_data[\"Material\"] == \"Cotton\")\n",
+ " & (input_data[\"Country\"] == \"India\")\n",
+ " & ((input_data[\"Volume\"] == 745))\n",
+ "]\n",
"test_location"
]
},
@@ -331,7 +335,7 @@
],
"source": [
"test_location = test_location.set_crs(\"EPSG:4326\")\n",
- "print(f'projection of user data is: {test_location.crs}')"
+ "print(f\"projection of user data is: {test_location.crs}\")"
]
},
{
@@ -341,7 +345,7 @@
"metadata": {},
"outputs": [],
"source": [
- "#reproject to epsg3857\n",
+ "# reproject to epsg3857\n",
"test_location = test_location.to_crs(\"EPSG:3857\")"
]
},
@@ -352,7 +356,9 @@
"metadata": {},
"outputs": [],
"source": [
- "test_location.to_file('../../datasets/raw/probability_map/test_location_epsg3857.shp', driver='ESRI Shapefile')"
+ "test_location.to_file(\n",
+ " \"../../datasets/raw/probability_map/test_location_epsg3857.shp\", driver=\"ESRI Shapefile\"\n",
+ ")"
]
},
{
@@ -394,8 +400,8 @@
"metadata": {},
"outputs": [],
"source": [
- "yield_cotton = '../../datasets/raw/crop_data/cotton_YieldPerHectare.tif'\n",
- "harvest_portion_cotton = '../../datasets/raw/crop_data/cotton_HarvestedAreaFraction.tif'\n"
+ "yield_cotton = \"../../datasets/raw/crop_data/cotton_YieldPerHectare.tif\"\n",
+ "harvest_portion_cotton = \"../../datasets/raw/crop_data/cotton_HarvestedAreaFraction.tif\""
]
},
{
@@ -568,7 +574,7 @@
}
],
"source": [
- "# reproject yield \n",
+ "# reproject yield\n",
"!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:3857 -tr 12051.131160772875 12051.131160772875 -r near -of GTiff ../../datasets/raw/crop_data/cotton_YieldPerHectare.tif ../../datasets/raw/crop_data/cotton_YieldPerHectare_epsg3857.tif"
]
},
@@ -657,8 +663,8 @@
}
],
"source": [
- "#check infor of the reprojected raster\n",
- "!gdalinfo -stats -hist '../../datasets/raw/crop_data/cotton_HarvestedAreaFraction_epsg3857.tif'\n"
+ "# check infor of the reprojected raster\n",
+ "!gdalinfo -stats -hist '../../datasets/raw/crop_data/cotton_HarvestedAreaFraction_epsg3857.tif'"
]
},
{
@@ -808,8 +814,7 @@
"source": [
"## calculate pixel area based on pixel size\n",
"pixel_area = 12051.131160772874864 * 12051.131160772874864\n",
- "print(f'The pixel area is {pixel_area} m2')\n",
- "\n"
+ "print(f\"The pixel area is {pixel_area} m2\")"
]
},
{
@@ -906,7 +911,7 @@
}
],
"source": [
- "#generate raster with pixel area raster\n",
+ "# generate raster with pixel area raster\n",
"# reclasifies the raster into 0 and pixel area being the pixel area just on thise locations with harvest area fraction\n",
"!gdal_calc.py -A '../../datasets/raw/crop_data/cotton_HarvestedAreaFraction_epsg3857.tif' --outfile='../../datasets/raw/probability_map/pixel_area_cotton_raster_epsg3857.tif' --calc=\"(A > 0) * 145229762.254151\""
]
@@ -1035,13 +1040,12 @@
}
],
"source": [
- "#zonal stats in india to get the sum of all fraction harvest area\n",
- "total_harves_area_cotton = '../../datasets/raw/probability_map/area_total_cotton_raster_epsg3857.tif'\n",
+ "# zonal stats in india to get the sum of all fraction harvest area\n",
+ "total_harves_area_cotton = (\n",
+ " \"../../datasets/raw/probability_map/area_total_cotton_raster_epsg3857.tif\"\n",
+ ")\n",
"start_time = time.time()\n",
- "zs_india_test = zonal_stats(\n",
- " test_location,\n",
- " total_harves_area_cotton,\n",
- " stats=\"sum\")\n",
+ "zs_india_test = zonal_stats(test_location, total_harves_area_cotton, stats=\"sum\")\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -1060,7 +1064,7 @@
}
],
"source": [
- "print(f' The total cotton harvest area in india is :', {zs_india_test[0]['sum']}, 'm2')"
+ "print(\" The total cotton harvest area in india is :\", {zs_india_test[0][\"sum\"]}, \"m2\")"
]
},
{
@@ -1137,7 +1141,7 @@
],
"source": [
"## ad field to gdf\n",
- "test_location['Total_af'] = zs_india_test[0]['sum']\n",
+ "test_location[\"Total_af\"] = zs_india_test[0][\"sum\"]\n",
"test_location"
]
},
@@ -1148,7 +1152,9 @@
"metadata": {},
"outputs": [],
"source": [
- "test_location.to_file('../../datasets/raw/probability_map/test_location_epsg3857.shp', driver='ESRI Shapefile')"
+ "test_location.to_file(\n",
+ " \"../../datasets/raw/probability_map/test_location_epsg3857.shp\", driver=\"ESRI Shapefile\"\n",
+ ")"
]
},
{
@@ -1167,7 +1173,7 @@
],
"source": [
"## generate a raster with same extent as the other ones with this total area fraction value\n",
- "!gdal_rasterize -l test_location_epsg3857 -a Total_af -tr 12051.131160772875 12051.131160772875 -a_nodata 0.0 -te -20037508.3428 -242486969.8524 20032502.7668 191642979.0833 -ot Float32 -of GTiff '../../datasets/raw/probability_map/test_location_epsg3857.shp' '../../datasets/raw/probability_map/test_location_raster_total_af.tif'\n"
+ "!gdal_rasterize -l test_location_epsg3857 -a Total_af -tr 12051.131160772875 12051.131160772875 -a_nodata 0.0 -te -20037508.3428 -242486969.8524 20032502.7668 191642979.0833 -ot Float32 -of GTiff '../../datasets/raw/probability_map/test_location_epsg3857.shp' '../../datasets/raw/probability_map/test_location_raster_total_af.tif'"
]
},
{
@@ -1406,13 +1412,22 @@
}
],
"source": [
- "with rio.open('../../datasets/processed/probability_map/purchase_area_distribution_cotton_epsg3857.tif') as src:\n",
+ "with rio.open(\n",
+ " \"../../datasets/processed/probability_map/purchase_area_distribution_cotton_epsg3857.tif\"\n",
+ ") as src:\n",
" image_array = src.read(1)\n",
- " fig, ax = plt.subplots(figsize=[15,10])\n",
- " ax.set_ylim((695174.093781,4.255931e+06))\n",
- " ax.set_xlim((7.582124e+06,1.084202e+07))\n",
- " rio.plot.show(image_array, vmin=2.1023777208029e-06, vmax=1.0740570812899e-05, cmap='Oranges', ax=ax, transform=src.transform)\n",
- " ax.set_title('Geospatial responsibility - test location')"
+ " fig, ax = plt.subplots(figsize=[15, 10])\n",
+ " ax.set_ylim((695174.093781, 4.255931e06))\n",
+ " ax.set_xlim((7.582124e06, 1.084202e07))\n",
+ " rio.plot.show(\n",
+ " image_array,\n",
+ " vmin=2.1023777208029e-06,\n",
+ " vmax=1.0740570812899e-05,\n",
+ " cmap=\"Oranges\",\n",
+ " ax=ax,\n",
+ " transform=src.transform,\n",
+ " )\n",
+ " ax.set_title(\"Geospatial responsibility - test location\")"
]
},
{
diff --git a/data/notebooks/Lab/0_6_test_h3ronpy_fg.ipynb b/data/notebooks/Lab/0_6_test_h3ronpy_fg.ipynb
index 4471d8a17..8ff425f92 100644
--- a/data/notebooks/Lab/0_6_test_h3ronpy_fg.ipynb
+++ b/data/notebooks/Lab/0_6_test_h3ronpy_fg.ipynb
@@ -13,20 +13,16 @@
"cell_type": "code",
"execution_count": 30,
"source": [
+ "import os\n",
+ "\n",
+ "import geopandas as gpd\n",
"import h3\n",
- "from h3ronpy import raster\n",
+ "import pandas as pd\n",
"import rasterio as rio\n",
"import rasterio.plot\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "from rasterstats import gen_zonal_stats, gen_point_query\n",
- "from rasterstats import zonal_stats\n",
- "import pandas as pd\n",
- "import geopandas as gpd\n",
- "import h3\n",
- "import json\n",
- "import os\n",
- "from shapely.geometry import shape, mapping, box, Point, LinearRing, Polygon\n"
+ "from h3ronpy import raster\n",
+ "from rasterstats import gen_point_query\n",
+ "from shapely.geometry import Point, Polygon, box, mapping"
],
"outputs": [],
"metadata": {}
@@ -35,9 +31,11 @@
"cell_type": "code",
"execution_count": 4,
"source": [
- "test_raster = '../../data/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m.tif'\n",
+ "test_raster = (\n",
+ " \"../../data/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m.tif\"\n",
+ ")\n",
"\n",
- "test_area = (-10,40,0,50)"
+ "test_area = (-10, 40, 0, 50)"
],
"outputs": [],
"metadata": {}
@@ -51,10 +49,16 @@
" transform = rio.windows.transform(window, src.transform)\n",
" print(src.profile)\n",
" rio.plot.show(src.read(window=window, masked=True))\n",
- " gdf = raster.raster_to_geodataframe(src.read(1, window=window), transform, h3_resolution=4, nodata_value=src.profile['nodata'], compacted=False)\n",
+ " gdf = raster.raster_to_geodataframe(\n",
+ " src.read(1, window=window),\n",
+ " transform,\n",
+ " h3_resolution=4,\n",
+ " nodata_value=src.profile[\"nodata\"],\n",
+ " compacted=False,\n",
+ " )\n",
"\n",
- "gdf.plot('value')\n",
- "#gdf['h3index'] = gdf['h3index'].apply(hex)\n",
+ "gdf.plot(\"value\")\n",
+ "# gdf['h3index'] = gdf['h3index'].apply(hex)\n",
"gdf.head()"
],
"outputs": [
@@ -184,9 +188,11 @@
"idx_int = [int(h, 16) for h in idx]\n",
"geoms = h3.h3_set_to_multi_polygon(idx, geo_json=True)\n",
"\n",
- "df = pd.DataFrame({'h3index':idx_int, 'value':gen_point_query(pts, test_raster, interpolate='nearest')})\n",
+ "df = pd.DataFrame(\n",
+ " {\"h3index\": idx_int, \"value\": gen_point_query(pts, test_raster, interpolate=\"nearest\")}\n",
+ ")\n",
"df = df.dropna()\n",
- "df.plot('value')\n",
+ "df.plot(\"value\")\n",
"df.head()"
],
"outputs": [
@@ -277,7 +283,7 @@
"cell_type": "code",
"execution_count": 7,
"source": [
- "j = gdf.set_index('h3index').join(df.set_index('h3index'), rsuffix='t')\n",
+ "j = gdf.set_index(\"h3index\").join(df.set_index(\"h3index\"), rsuffix=\"t\")\n",
"j"
],
"outputs": [
@@ -443,7 +449,9 @@
"execution_count": 135,
"source": [
"%%timeit\n",
- "pd.DataFrame({'h3index':idx_int, 'value':gen_point_query(pts, test_raster, interpolate='nearest')})"
+ "pd.DataFrame(\n",
+ " {\"h3index\": idx_int, \"value\": gen_point_query(pts, test_raster, interpolate=\"nearest\")}\n",
+ ")"
],
"outputs": [
{
@@ -462,7 +470,13 @@
"source": [
"%%timeit\n",
"with rio.open(test_raster) as src:\n",
- " raster.to_dataframe(src.read(1, window=window), transform, h3_resolution=4, nodata_value=src.profile['nodata'], compacted=False)"
+ " raster.to_dataframe(\n",
+ " src.read(1, window=window),\n",
+ " transform,\n",
+ " h3_resolution=4,\n",
+ " nodata_value=src.profile[\"nodata\"],\n",
+ " compacted=False,\n",
+ " )"
],
"outputs": [
{
@@ -484,10 +498,11 @@
" for ji, window in src.block_windows():\n",
" transform = rio.windows.transform(window, src.transform)\n",
" arr = src.read(1, window=window)\n",
- " \n",
- " df = h3ronpy.raster.raster_to_geodataframe(arr, transform, 4, nodata_value=src.profile['nodata'], compacted=False)\n",
- " dfs.append(df)\n",
- "\n"
+ "\n",
+ " df = h3ronpy.raster.raster_to_geodataframe(\n",
+ " arr, transform, 4, nodata_value=src.profile[\"nodata\"], compacted=False\n",
+ " )\n",
+ " dfs.append(df)"
],
"outputs": [],
"metadata": {}
@@ -496,7 +511,7 @@
"cell_type": "code",
"execution_count": 147,
"source": [
- "l = [i for df in dfs for i in df['h3index']]\n",
+ "l = [i for df in dfs for i in df[\"h3index\"]]\n",
"print(len(l))\n",
"print(len(set(l)))"
],
@@ -516,7 +531,7 @@
"cell_type": "code",
"execution_count": 150,
"source": [
- "pd.concat(dfs).plot('value')"
+ "pd.concat(dfs).plot(\"value\")"
],
"outputs": [
{
@@ -549,12 +564,15 @@
"execution_count": 23,
"source": [
"from math import ceil\n",
+ "\n",
"BLOCKSIZE = 512\n",
+ "\n",
+ "\n",
"def gen_raster_h3(raster_list, h3_res):\n",
" \"\"\"Convert a list of identically formatted rasters to H3\n",
- " \n",
+ "\n",
" A function for efficiently turning a set of rasters into an H3 table.\n",
- " \n",
+ "\n",
" Takes a list of 1-band rasters with identical projection/transform.\n",
" Reads each raster in blocks, and converts to h3 (nearest to centroid).\n",
" Yields a dataframe with an h3index and one column for each raster's value.\n",
@@ -562,35 +580,36 @@
" Args:\n",
" raster_list: list of paths to rasters\n",
" h3_res: h3 resolution to use for resampling\n",
- " \n",
+ "\n",
" Yields:\n",
- " A Pandas dataframe for each raster block (usu. 512x512) with an \n",
+ " A Pandas dataframe for each raster block (usu. 512x512) with an\n",
" h3index and one column for each raster's value.\n",
" \"\"\"\n",
" readers = [rio.open(r) for r in raster_list]\n",
" names = [os.path.splitext(os.path.basename(r))[0].lower() for r in raster_list]\n",
- " \n",
+ "\n",
" base = readers[0]\n",
- " for j in range(ceil(base.height/BLOCKSIZE)):\n",
- " for i in range(ceil(base.width/BLOCKSIZE)):\n",
- " window = rio.windows.Window(i*BLOCKSIZE, j*BLOCKSIZE, BLOCKSIZE, BLOCKSIZE)\n",
+ " for j in range(ceil(base.height / BLOCKSIZE)):\n",
+ " for i in range(ceil(base.width / BLOCKSIZE)):\n",
+ " window = rio.windows.Window(i * BLOCKSIZE, j * BLOCKSIZE, BLOCKSIZE, BLOCKSIZE)\n",
" w_transform = rio.windows.transform(window, base.transform)\n",
" dfs = []\n",
" for src in readers:\n",
" if src.transform != base.transform:\n",
" raise ValueError(\"Transforms do not match\")\n",
" arr = src.read(1, window=window)\n",
- " _df = raster.raster_to_dataframe(arr, w_transform, h3_res, nodata_value=src.profile['nodata'], compacted=False)\n",
- " dfs.append(_df.set_index('h3index')['value'])\n",
+ " _df = raster.raster_to_dataframe(\n",
+ " arr, w_transform, h3_res, nodata_value=src.profile[\"nodata\"], compacted=False\n",
+ " )\n",
+ " dfs.append(_df.set_index(\"h3index\")[\"value\"])\n",
" df = pd.concat(dfs, axis=1)\n",
- " print(f'Reading block {j}, {i}: h3index count {len(df)}')\n",
+ " print(f\"Reading block {j}, {i}: h3index count {len(df)}\")\n",
" if len(df):\n",
" df.columns = names\n",
" # cast h3index from int64 to hex string\n",
" yield df\n",
" for src in readers:\n",
- " src.close()\n",
- "\n"
+ " src.close()"
],
"outputs": [],
"metadata": {}
@@ -600,14 +619,16 @@
"execution_count": 32,
"source": [
"test_list = [\n",
- " '../../data/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m.tif',\n",
- " '../../data/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Cropland2000_5m.tif'\n",
+ " \"../../data/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Pasture2000_5m.tif\",\n",
+ " \"../../data/raw/crop_data/default_pasture/CroplandPastureArea2000_Geotiff/Cropland2000_5m.tif\",\n",
"]\n",
- "test_dir = '../../data/seed/mapspam/spam2017v2r1_ssa_prod'\n",
+ "test_dir = \"../../data/seed/mapspam/spam2017v2r1_ssa_prod\"\n",
"test_list2 = [os.path.join(test_dir, f) for f in os.listdir(test_dir)]\n",
"\n",
"h3grid = pd.concat(list(gen_raster_h3(test_list2, 4)))\n",
- "h3grid = gpd.GeoDataFrame(h3grid, geometry=[Polygon(h3.h3_to_geo_boundary(h, geo_json=True)) for h in h3grid.index])\n",
+ "h3grid = gpd.GeoDataFrame(\n",
+ " h3grid, geometry=[Polygon(h3.h3_to_geo_boundary(h, geo_json=True)) for h in h3grid.index]\n",
+ ")\n",
"h3grid.plot()"
],
"outputs": [
diff --git a/data/notebooks/Lab/0_7_Calculate_bins_for_contextual_layers.ipynb b/data/notebooks/Lab/0_7_Calculate_bins_for_contextual_layers.ipynb
index 312c41594..18bce9c26 100644
--- a/data/notebooks/Lab/0_7_Calculate_bins_for_contextual_layers.ipynb
+++ b/data/notebooks/Lab/0_7_Calculate_bins_for_contextual_layers.ipynb
@@ -7,11 +7,7 @@
"metadata": {},
"outputs": [],
"source": [
- "import json\n",
- "import os\n",
- "\n",
"import matplotlib.pyplot as plt\n",
- "import numpy as np\n",
"import pandas as pd\n",
"from mapclassify import NaturalBreaks\n",
"from psycopg import connect\n",
diff --git a/data/notebooks/Lab/0_data_geocoding.ipynb b/data/notebooks/Lab/0_data_geocoding.ipynb
index 115e1f67b..41dad0099 100644
--- a/data/notebooks/Lab/0_data_geocoding.ipynb
+++ b/data/notebooks/Lab/0_data_geocoding.ipynb
@@ -102,16 +102,11 @@
}
],
"source": [
- "import pandas as pd\n",
- "import geopandas as gpd\n",
- "import numpy as np\n",
"import os\n",
- "import shapely.wkt\n",
- "import folium\n",
"\n",
- "from collections import OrderedDict\n",
- "import requests\n",
- "import time\n",
+ "import folium\n",
+ "import geopandas as gpd\n",
+ "import pandas as pd\n",
"from shapely.geometry import Point"
]
},
@@ -130,25 +125,27 @@
"metadata": {},
"outputs": [],
"source": [
- "def visualise(data, z=3, loc=[0,0], color=\"#f69\", tooltip_property=None):\n",
+ "def visualise(data, z=3, loc=[0, 0], color=\"#f69\", tooltip_property=None):\n",
" \"\"\"Maps a list of geojson features\"\"\"\n",
" # Adapted from docs: https://geopandas.readthedocs.io/en/latest/gallery/polygon_plotting_with_folium.html\n",
"\n",
- " m = folium.Map(location=loc, zoom_start=z, tiles='CartoDB positron')\n",
- " \n",
+ " m = folium.Map(location=loc, zoom_start=z, tiles=\"CartoDB positron\")\n",
+ "\n",
" for d in data:\n",
- " geo_j = folium.GeoJson(data=d['geometry'],\n",
- " style_function=lambda x: {\n",
- " 'fillColor': color,\n",
- " 'color': color,\n",
- " 'weight': 1.5,\n",
- " })\n",
+ " geo_j = folium.GeoJson(\n",
+ " data=d[\"geometry\"],\n",
+ " style_function=lambda x: {\n",
+ " \"fillColor\": color,\n",
+ " \"color\": color,\n",
+ " \"weight\": 1.5,\n",
+ " },\n",
+ " )\n",
"\n",
" ## No popup yet\n",
" if tooltip_property:\n",
- " prop = d['properties'].get(tooltip_property, '')\n",
+ " prop = d[\"properties\"].get(tooltip_property, \"\")\n",
" folium.Popup(str(prop)).add_to(geo_j)\n",
- " \n",
+ "\n",
" geo_j.add_to(m)\n",
"\n",
" return m"
@@ -184,7 +181,7 @@
}
],
"source": [
- "point = GeolocateAddress(query='india')\n",
+ "point = GeolocateAddress(query=\"india\")\n",
"point.point"
]
},
@@ -247,8 +244,8 @@
}
],
"source": [
- "input_dir = '../../data/raw/'\n",
- "files = [input_dir+f for f in os.listdir(input_dir) if '.csv' in f]\n",
+ "input_dir = \"../../data/raw/\"\n",
+ "files = [input_dir + f for f in os.listdir(input_dir) if \".csv\" in f]\n",
"files"
]
},
@@ -403,7 +400,7 @@
],
"source": [
"# check unique field in location type\n",
- "set(input_data['Location type'])"
+ "set(input_data[\"Location type\"])"
]
},
{
@@ -412,10 +409,14 @@
"metadata": {},
"outputs": [],
"source": [
- "unknown_locations = input_data[input_data['Location type']=='Unknown']\n",
- "os_facility = input_data[input_data['Location type']=='Origin supplier facility (warehouse, silo, mill, etc.)']\n",
- "oc = input_data[input_data['Location type']=='Origin country']\n",
- "os_production = input_data[input_data['Location type']=='Point of production (farm, ranch, plantation, etc.)']"
+ "unknown_locations = input_data[input_data[\"Location type\"] == \"Unknown\"]\n",
+ "os_facility = input_data[\n",
+ " input_data[\"Location type\"] == \"Origin supplier facility (warehouse, silo, mill, etc.)\"\n",
+ "]\n",
+ "oc = input_data[input_data[\"Location type\"] == \"Origin country\"]\n",
+ "os_production = input_data[\n",
+ " input_data[\"Location type\"] == \"Point of production (farm, ranch, plantation, etc.)\"\n",
+ "]"
]
},
{
@@ -438,12 +439,14 @@
}
],
"source": [
- "print(f'The total length of the input data file is: {len(input_data)} rows/locations.\\n')\n",
- "print(f\"\"\"For those locations, there are {len(unknown_locations)} unknown locations,\n",
+ "print(f\"The total length of the input data file is: {len(input_data)} rows/locations.\\n\")\n",
+ "print(\n",
+ " f\"\"\"For those locations, there are {len(unknown_locations)} unknown locations,\n",
"{len(os_facility)} Origin supplier facility (warehouse, silo, mill, etc locations,\n",
"{len(oc)} 'Origin country' locations and \n",
"{len(os_production)} 'Point of production (farm, ranch, plantation, etc.)' locations\n",
- "\"\"\")\n"
+ "\"\"\"\n",
+ ")"
]
},
{
@@ -591,14 +594,14 @@
" for i in range(0, len(gdf)):\n",
" row = gdf.iloc[i]\n",
" rowIndex = gdf.index[i]\n",
- " lat = row['Latitude']\n",
- " lng = row['Longitude']\n",
+ " lat = row[\"Latitude\"]\n",
+ " lng = row[\"Longitude\"]\n",
" point = (lng, lat)\n",
" geom = Point(point)\n",
" geoms.append(geom)\n",
- " #gdf.loc[rowIndex, 'Geometry_wkt'] = geom.to_wkt()\n",
- " gdf.loc[rowIndex, 'Accuracy'] = 'High'\n",
- " gdf['Geometry'] = geoms\n",
+ " # gdf.loc[rowIndex, 'Geometry_wkt'] = geom.to_wkt()\n",
+ " gdf.loc[rowIndex, \"Accuracy\"] = \"High\"\n",
+ " gdf[\"Geometry\"] = geoms\n",
" return gdf"
]
},
@@ -871,7 +874,7 @@
"metadata": {},
"outputs": [],
"source": [
- "geolocated_gdf = geolocated_gdf.set_geometry('Geometry')"
+ "geolocated_gdf = geolocated_gdf.set_geometry(\"Geometry\")"
]
},
{
@@ -1072,6 +1075,7 @@
],
"source": [
"import json\n",
+ "\n",
"gjson = json.loads(geolocated_gdf.to_json())\n",
"gjson"
]
@@ -1096,7 +1100,7 @@
}
],
"source": [
- "visualise(gjson['features'], tooltip_property='Location type')"
+ "visualise(gjson[\"features\"], tooltip_property=\"Location type\")"
]
},
{
@@ -1481,7 +1485,9 @@
}
],
"source": [
- "processing_facility['Full Address'] = processing_facility['Address'] + ', ' + processing_facility['Country']\n",
+ "processing_facility[\"Full Address\"] = (\n",
+ " processing_facility[\"Address\"] + \", \" + processing_facility[\"Country\"]\n",
+ ")\n",
"processing_facility.head()"
]
},
@@ -1505,7 +1511,7 @@
"source": [
"### Why could try and do an .apply() operation on the whole df\n",
"\n",
- "address = processing_facility.iloc[0]['Full Address']\n",
+ "address = processing_facility.iloc[0][\"Full Address\"]\n",
"geo_loc_test = GeolocateAddress(query=address)"
]
},
@@ -1571,7 +1577,7 @@
}
],
"source": [
- "gdf = gpd.GeoDataFrame.from_features(geo_loc_test.polygon_json, crs='epsg:4326')\n",
+ "gdf = gpd.GeoDataFrame.from_features(geo_loc_test.polygon_json, crs=\"epsg:4326\")\n",
"gdf"
]
},
@@ -1595,8 +1601,8 @@
}
],
"source": [
- "m = folium.Map(location=[0,0],tiles=\"cartodbpositron\", zoom_start=5)\n",
- "folium.GeoJson(data=gdf[\"geometry\"]).add_to(m) \n",
+ "m = folium.Map(location=[0, 0], tiles=\"cartodbpositron\", zoom_start=5)\n",
+ "folium.GeoJson(data=gdf[\"geometry\"]).add_to(m)\n",
"m"
]
},
@@ -1747,7 +1753,7 @@
],
"source": [
"row = processing_facility.iloc[0]\n",
- "adress_country = row['Country']\n",
+ "adress_country = row[\"Country\"]\n",
"adress_country"
]
},
@@ -1834,7 +1840,7 @@
}
],
"source": [
- "gdf = gpd.GeoDataFrame.from_features(geo_loc_test.polygon_json, crs='epsg:4326')\n",
+ "gdf = gpd.GeoDataFrame.from_features(geo_loc_test.polygon_json, crs=\"epsg:4326\")\n",
"gdf"
]
},
@@ -1858,8 +1864,8 @@
}
],
"source": [
- "m = folium.Map(location=[0,0],tiles=\"cartodbpositron\", zoom_start=3)\n",
- "folium.GeoJson(data=gdf[\"geometry\"]).add_to(m) \n",
+ "m = folium.Map(location=[0, 0], tiles=\"cartodbpositron\", zoom_start=3)\n",
+ "folium.GeoJson(data=gdf[\"geometry\"]).add_to(m)\n",
"m"
]
},
@@ -2242,49 +2248,48 @@
"accuracy_list = []\n",
"for i in range(0, len(input_data)):\n",
" row = input_data.iloc[i]\n",
- " if row['Location type'] == 'Unknown' or row['Location type'] =='Origin country':\n",
- " country_name = row['Country']\n",
+ " if row[\"Location type\"] == \"Unknown\" or row[\"Location type\"] == \"Origin country\":\n",
+ " country_name = row[\"Country\"]\n",
" try:\n",
" geolocation = GeolocateAddress(query=country_name)\n",
- " gdf = gpd.GeoDataFrame.from_features(geolocation.polygon_json, crs='epsg:4326')\n",
- " geom = gdf['geometry'].iloc[0]\n",
- " accuracy = 'Low'\n",
+ " gdf = gpd.GeoDataFrame.from_features(geolocation.polygon_json, crs=\"epsg:4326\")\n",
+ " geom = gdf[\"geometry\"].iloc[0]\n",
+ " accuracy = \"Low\"\n",
" except:\n",
- " print(f'Geolocation failed for {country_name}')\n",
- " geom = 'None'\n",
- " accuracy = 'None'\n",
- " if row['Location type'] == 'Origin supplier facility (warehouse, silo, mill, etc.)':\n",
+ " print(f\"Geolocation failed for {country_name}\")\n",
+ " geom = \"None\"\n",
+ " accuracy = \"None\"\n",
+ " if row[\"Location type\"] == \"Origin supplier facility (warehouse, silo, mill, etc.)\":\n",
" try:\n",
- " adress_count = row['Address'] + ', ' + row['Country']\n",
+ " adress_count = row[\"Address\"] + \", \" + row[\"Country\"]\n",
" geolocation = GeolocateAddress(query=adress_count)\n",
- " gdf = gpd.GeoDataFrame.from_features(geolocation.polygon_json, crs='epsg:4326')\n",
- " geom = gdf['geometry'].iloc[0]\n",
- " accuracy = 'Medium'\n",
+ " gdf = gpd.GeoDataFrame.from_features(geolocation.polygon_json, crs=\"epsg:4326\")\n",
+ " geom = gdf[\"geometry\"].iloc[0]\n",
+ " accuracy = \"Medium\"\n",
" except:\n",
- " print(f'Geolocation failed for row {i}')\n",
+ " print(f\"Geolocation failed for row {i}\")\n",
" try:\n",
- " print('trying for country...')\n",
- " country_name = row['Country']\n",
+ " print(\"trying for country...\")\n",
+ " country_name = row[\"Country\"]\n",
" geolocation = GeolocateAddress(query=country_name)\n",
- " gdf = gpd.GeoDataFrame.from_features(geolocation.polygon_json, crs='epsg:4326')\n",
- " geom = gdf['geometry'].iloc[0]\n",
- " accuracy = 'Low'\n",
+ " gdf = gpd.GeoDataFrame.from_features(geolocation.polygon_json, crs=\"epsg:4326\")\n",
+ " geom = gdf[\"geometry\"].iloc[0]\n",
+ " accuracy = \"Low\"\n",
" except:\n",
- " print(f'Geolocation failed for {country_name}')\n",
- " geom = 'None'\n",
- " accuracy= 'None'\n",
- " \n",
- " if row['Location type'] == 'Point of production (farm, ranch, plantation, etc.)':\n",
- " lat = row['Latitude']\n",
- " lng = row['Longitude']\n",
- " #point = (lat, lng)\n",
+ " print(f\"Geolocation failed for {country_name}\")\n",
+ " geom = \"None\"\n",
+ " accuracy = \"None\"\n",
+ "\n",
+ " if row[\"Location type\"] == \"Point of production (farm, ranch, plantation, etc.)\":\n",
+ " lat = row[\"Latitude\"]\n",
+ " lng = row[\"Longitude\"]\n",
+ " # point = (lat, lng)\n",
" point = (lng, lat)\n",
" geom = Point(point)\n",
- " accuracy = 'High'\n",
- " \n",
+ " accuracy = \"High\"\n",
+ "\n",
" geometry_list.append(geom)\n",
- " accuracy_list.append(accuracy)\n",
- " "
+ " accuracy_list.append(accuracy)"
]
},
{
@@ -2304,10 +2309,12 @@
}
],
"source": [
- "print(f\"\"\"\n",
+ "print(\n",
+ " f\"\"\"\n",
"lenght of geocoded locations: {len(geometry_list)},\n",
"lenght of input data: {len(input_data)}\n",
- "\"\"\")"
+ "\"\"\"\n",
+ ")"
]
},
{
@@ -2447,8 +2454,8 @@
}
],
"source": [
- "input_data['Geometry'] = geometry_list\n",
- "input_data['Accuracy'] = accuracy_list\n",
+ "input_data[\"Geometry\"] = geometry_list\n",
+ "input_data[\"Accuracy\"] = accuracy_list\n",
"input_data.head()"
]
},
@@ -2459,8 +2466,8 @@
"outputs": [],
"source": [
"gdf.to_file(\n",
- " '../Processed_data/located_lg_data_polygon.shp',\n",
- " driver='ESRI Shapefile',\n",
+ " \"../Processed_data/located_lg_data_polygon.shp\",\n",
+ " driver=\"ESRI Shapefile\",\n",
")"
]
},
@@ -2519,8 +2526,8 @@
}
],
"source": [
- "#check None geometries\n",
- "input_data[input_data['Geometry']=='None']"
+ "# check None geometries\n",
+ "input_data[input_data[\"Geometry\"] == \"None\"]"
]
},
{
@@ -2529,7 +2536,7 @@
"metadata": {},
"outputs": [],
"source": [
- "gdf = gpd.GeoDataFrame(input_data, geometry='Geometry')"
+ "gdf = gpd.GeoDataFrame(input_data, geometry=\"Geometry\")"
]
},
{
@@ -2538,9 +2545,9 @@
"metadata": {},
"outputs": [],
"source": [
- "gdf_polygon = gdf[gdf['Geometry'].apply(lambda x : x.type!='Point' )]\n",
- "gdf_point = gdf[gdf['Geometry'].apply(lambda x : x.type=='Point' )]\n",
- "gdf_polygon = gdf_polygon[gdf_polygon['Geometry'].apply(lambda x : x.type!='LineString' )]\n"
+ "gdf_polygon = gdf[gdf[\"Geometry\"].apply(lambda x: x.type != \"Point\")]\n",
+ "gdf_point = gdf[gdf[\"Geometry\"].apply(lambda x: x.type == \"Point\")]\n",
+ "gdf_polygon = gdf_polygon[gdf_polygon[\"Geometry\"].apply(lambda x: x.type != \"LineString\")]"
]
},
{
@@ -2549,8 +2556,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#check the linestring data retrieved\n",
- "gdf_LS = gdf[gdf['Geometry'].apply(lambda x : x.type=='LineString' )]"
+ "# check the linestring data retrieved\n",
+ "gdf_LS = gdf[gdf[\"Geometry\"].apply(lambda x: x.type == \"LineString\")]"
]
},
{
@@ -2636,8 +2643,8 @@
"outputs": [],
"source": [
"gdf_point.to_file(\n",
- " '../Processed_data/located_lg_data_point_v2.shp',\n",
- " driver='ESRI Shapefile',\n",
+ " \"../Processed_data/located_lg_data_point_v2.shp\",\n",
+ " driver=\"ESRI Shapefile\",\n",
")"
]
},
@@ -2648,8 +2655,8 @@
"outputs": [],
"source": [
"gdf_polygon.to_file(\n",
- " '../Processed_data/located_lg_data_polygon_v2.shp',\n",
- " driver='ESRI Shapefile',\n",
+ " \"../Processed_data/located_lg_data_polygon_v2.shp\",\n",
+ " driver=\"ESRI Shapefile\",\n",
")"
]
},
diff --git a/data/notebooks/Lab/10_1_Met_v01_Water_indicator_coeficients_csv.ipynb b/data/notebooks/Lab/10_1_Met_v01_Water_indicator_coeficients_csv.ipynb
index 3111a2b49..e24482059 100644
--- a/data/notebooks/Lab/10_1_Met_v01_Water_indicator_coeficients_csv.ipynb
+++ b/data/notebooks/Lab/10_1_Met_v01_Water_indicator_coeficients_csv.ipynb
@@ -25,7 +25,6 @@
},
"outputs": [],
"source": [
- "import numpy as np\n",
"import pandas as pd"
]
},
@@ -190,7 +189,9 @@
"metadata": {},
"outputs": [],
"source": [
- "df_long.to_csv(\"../../indicator_coefficient_importer/data/bwfp_indicator_coefficients.csv\", index=False)"
+ "df_long.to_csv(\n",
+ " \"../../indicator_coefficient_importer/data/bwfp_indicator_coefficients.csv\", index=False\n",
+ ")"
]
}
],
diff --git a/data/notebooks/Lab/10_2_kernel_impl_explorations.ipynb b/data/notebooks/Lab/10_2_kernel_impl_explorations.ipynb
index ecd11e624..2eabcf2c7 100644
--- a/data/notebooks/Lab/10_2_kernel_impl_explorations.ipynb
+++ b/data/notebooks/Lab/10_2_kernel_impl_explorations.ipynb
@@ -6,10 +6,10 @@
"metadata": {},
"outputs": [],
"source": [
- "import numpy as np\n",
+ "import cv2\n",
"import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
"import rasterio as rio\n",
- "import cv2\n",
"import scipy"
]
},
@@ -40,7 +40,7 @@
}
],
"source": [
- "plt.imshow(cv2.getStructuringElement(cv2.MORPH_ELLIPSE, ksize=(70,70)))"
+ "plt.imshow(cv2.getStructuringElement(cv2.MORPH_ELLIPSE, ksize=(70, 70)))"
]
},
{
@@ -54,7 +54,6 @@
" meta = src.meta.copy()\n",
" transform = src.transform\n",
" arr = src.read(1)\n",
- " orig_crs = src.crs\n",
" # km per degree near the ecuator. At high lats this will bite us in the ass\n",
" # The issue here is that the kernel size should vary depending on the raster latitude and proj\n",
" # for now we will asume that the error for high lat rasters is ok but we should explore a fix.\n",
@@ -62,9 +61,11 @@
"\n",
" y_size_km = -transform[4] * 111 # 1 deg ~~ 111 km at ecuator\n",
" radius_in_pixels = int(radius // y_size_km)\n",
- " kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, ksize=(radius_in_pixels, radius_in_pixels))\n",
- " \n",
- " # apply the buffer using opencv filter function. \n",
+ " kernel = cv2.getStructuringElement(\n",
+ " cv2.MORPH_ELLIPSE, ksize=(radius_in_pixels, radius_in_pixels)\n",
+ " )\n",
+ "\n",
+ " # apply the buffer using opencv filter function.\n",
" # It calculates the cross-croletation instead of the convolution but\n",
" # since we are using a simetric kernel it does not matter.\n",
" # Also it is 100x faster than the scipy convolve ¯\\_(ツ)_/¯\n",
@@ -117,8 +118,11 @@
],
"source": [
"%%timeit\n",
- "main(\"../../../../Hansen_GFC-2020-v1.8_lossyear_20S_060W_10km.tif\",\n",
- " \"../../../../Hansen_GFC-2020-v1.8_lossyear_20S_060W_10km_buff.tif\", 50)"
+ "main(\n",
+ " \"../../../../Hansen_GFC-2020-v1.8_lossyear_20S_060W_10km.tif\",\n",
+ " \"../../../../Hansen_GFC-2020-v1.8_lossyear_20S_060W_10km_buff.tif\",\n",
+ " 50,\n",
+ ")"
]
},
{
@@ -142,11 +146,11 @@
"radius_in_pixels = int(50 // y_size_km)\n",
"kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, ksize=(radius_in_pixels, radius_in_pixels))\n",
"\n",
- "# apply the buffer using opencv filter function. \n",
+ "# apply the buffer using opencv filter function.\n",
"# It calculates the cross-croletation instead of the convolution but\n",
"# since we are using a simetric kernel it does not matter.\n",
"# Also it is 100x faster than the scipy convolve ¯\\_(ツ)_/¯\n",
- "res_buff = cv2.filter2D(arr, ddepth=-1, kernel=kernel) / np.sum(kernel)\n"
+ "res_buff = cv2.filter2D(arr, ddepth=-1, kernel=kernel) / np.sum(kernel)"
]
},
{
@@ -195,9 +199,11 @@
"metadata": {},
"outputs": [],
"source": [
- "k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, ksize=(100,100))\n",
+ "k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, ksize=(100, 100))\n",
"\n",
- "res = focal_mean(\"../../../../SpeciesRichness_IDN_2021-01-01-2022-01-01.tif\", k, \"../../../../ktest_200.tig\")"
+ "res = focal_mean(\n",
+ " \"../../../../SpeciesRichness_IDN_2021-01-01-2022-01-01.tif\", k, \"../../../../ktest_200.tig\"\n",
+ ")"
]
},
{
@@ -207,7 +213,7 @@
"outputs": [],
"source": [
"src = rio.open(\"../../../../SpeciesRichness_IDN_2021-01-01-2022-01-01.tif\")\n",
- "data = src.read(1)\n"
+ "data = src.read(1)"
]
},
{
@@ -303,11 +309,11 @@
"metadata": {},
"outputs": [],
"source": [
- "with rio.open(\"../../../../ktest_200_cv.tif\",\"w\", **profile) as dst:\n",
- " dst.write(res_cv[np.newaxis,:])\n",
+ "with rio.open(\"../../../../ktest_200_cv.tif\", \"w\", **profile) as dst:\n",
+ " dst.write(res_cv[np.newaxis, :])\n",
"\n",
"with rio.open(\"../../../../ktest_200.tif\", \"w\", **profile) as dst:\n",
- " dst.write(res[np.newaxis,:])"
+ " dst.write(res[np.newaxis, :])"
]
},
{
@@ -346,7 +352,7 @@
"metadata": {},
"outputs": [],
"source": [
- "y_size_km = -gt[4]*111"
+ "y_size_km = -gt[4] * 111"
]
},
{
@@ -398,15 +404,6 @@
"crs.is_geographic"
]
},
- {
- "cell_type": "code",
- "execution_count": 93,
- "metadata": {},
- "outputs": [],
- "source": [
- "from rasterio.warp import calculate_default_transform, reproject, Resampling\n"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
diff --git a/data/notebooks/Lab/10_Met_v0.1_results.ipynb b/data/notebooks/Lab/10_Met_v0.1_results.ipynb
index 52f02bbe9..91e6b2b5e 100644
--- a/data/notebooks/Lab/10_Met_v0.1_results.ipynb
+++ b/data/notebooks/Lab/10_Met_v0.1_results.ipynb
@@ -36,17 +36,16 @@
"source": [
"# import libraries\n",
"import geopandas as gpd\n",
- "from rasterstats import zonal_stats\n",
"import rasterio as rio\n",
+ "from rasterstats import zonal_stats\n",
"\n",
"!pip install h3ronpy h3pandas --q\n",
- "from h3ronpy import raster\n",
+ "import os\n",
+ "\n",
"import h3\n",
- "import h3pandas\n",
"import pandas as pd\n",
- "from shapely.geometry import Polygon\n",
- "\n",
- "import os"
+ "from h3ronpy import raster\n",
+ "from shapely.geometry import Polygon"
]
},
{
@@ -57,10 +56,10 @@
"outputs": [],
"source": [
"import numpy as np\n",
- "from PIL import Image\n",
"import scipy.ndimage\n",
"import scipy.signal\n",
- "from osgeo import gdal"
+ "from osgeo import gdal\n",
+ "from PIL import Image"
]
},
{
@@ -70,7 +69,7 @@
"metadata": {},
"outputs": [],
"source": [
- "def buffer_stats(raster_path, vector_path, buffer=50000, stat_='sum', all_touched = True):\n",
+ "def buffer_stats(raster_path, vector_path, buffer=50000, stat_=\"sum\", all_touched=True):\n",
" \"\"\"\n",
" inputs:\n",
" -------------\n",
@@ -78,73 +77,71 @@
" vector_path: path to point file in EPSG:4326\n",
" buffer: distance in metres for coputing the buffer\n",
" stats: stadistics to compute\n",
- " \n",
+ "\n",
" output\n",
" -------\n",
" array with statistics\"\"\"\n",
- " \n",
- " #open vector file\n",
+ "\n",
+ " # open vector file\n",
" gdf = gpd.read_file(vector_path)\n",
- " #check projection\n",
- " #if gdf.crs != True:\n",
+ " # check projection\n",
+ " # if gdf.crs != True:\n",
" # print(gdf.crs)\n",
" # #project\n",
" # print('Dataset missing projection. Please assign one!')\n",
- " if gdf.crs and gdf.crs == 'EPSG:4326':\n",
- " #reproject\n",
- " gdf_3857 = gdf.to_crs('EPSG:3857')\n",
+ " if gdf.crs and gdf.crs == \"EPSG:4326\":\n",
+ " # reproject\n",
+ " gdf_3857 = gdf.to_crs(\"EPSG:3857\")\n",
" ## TODO:add other validations\n",
- " \n",
"\n",
- " #get buffer\n",
+ " # get buffer\n",
" gdf_3857_buffer = gdf_3857.buffer(buffer)\n",
- " #reproject back to epsg4326\n",
- " gdf_4326_buffer = gdf_3857_buffer.to_crs('EPSG:4326')\n",
- " #get statistics\n",
+ " # reproject back to epsg4326\n",
+ " gdf_4326_buffer = gdf_3857_buffer.to_crs(\"EPSG:4326\")\n",
+ " # get statistics\n",
" vizz_stats = []\n",
" for geom in gdf_4326_buffer:\n",
- " stats = zonal_stats(geom,\n",
- " raster_path,\n",
- " stats=stat_,\n",
- " all_touched = all_touched\n",
- " )\n",
- " stat_sum = stats[0]['sum']\n",
+ " stats = zonal_stats(geom, raster_path, stats=stat_, all_touched=all_touched)\n",
+ " stat_sum = stats[0][\"sum\"]\n",
" vizz_stats.append(stat_sum)\n",
- " #add stats in dataframe\n",
- " gdf['estimated']=vizz_stats\n",
+ " # add stats in dataframe\n",
+ " gdf[\"estimated\"] = vizz_stats\n",
" return gdf\n",
"\n",
- "def raster_to_h3(raster_path, resolution=6, field='value', plot=False):\n",
+ "\n",
+ "def raster_to_h3(raster_path, resolution=6, field=\"value\", plot=False):\n",
" \"\"\"convert raster to h3 with a given h3 resolution. Returns a gdf with the h3 geometries.\"\"\"\n",
- " \n",
- " with rio.open(raster_path) as src:\n",
- " gdf = raster.raster_to_geodataframe(src.read(1), src.transform, h3_resolution=resolution, nodata_value=src.profile['nodata'], compacted=False)\n",
"\n",
- " gdf = gdf.rename(columns={'value':field})\n",
+ " with rio.open(raster_path) as src:\n",
+ " gdf = raster.raster_to_geodataframe(\n",
+ " src.read(1),\n",
+ " src.transform,\n",
+ " h3_resolution=resolution,\n",
+ " nodata_value=src.profile[\"nodata\"],\n",
+ " compacted=False,\n",
+ " )\n",
+ "\n",
+ " gdf = gdf.rename(columns={\"value\": field})\n",
" if plot:\n",
" gdf.plot(field)\n",
- " gdf['h3index'] = gdf['h3index'].apply(hex)\n",
- " \n",
+ " gdf[\"h3index\"] = gdf[\"h3index\"].apply(hex)\n",
+ "\n",
" return gdf\n",
- " \n",
- " \n",
- "def focal_mean(raster_path, \n",
- " kernel_path, \n",
- " output_path):\n",
- " #open deforestation array\n",
+ "\n",
+ "\n",
+ "def focal_mean(raster_path, kernel_path, output_path):\n",
+ " # open deforestation array\n",
" ds_def = gdal.Open(raster_path)\n",
" def_array = np.array(ds_def.GetRasterBand(1).ReadAsArray())\n",
- " \n",
- " #open kernel path\n",
+ "\n",
+ " # open kernel path\n",
" ds_kernnel = gdal.Open(kernel_path)\n",
" kernnel_array = np.array(ds_kernnel.GetRasterBand(1).ReadAsArray())\n",
- " \n",
- " #perform the focal mean with convolute\n",
+ "\n",
+ " # perform the focal mean with convolute\n",
" result_fm = scipy.ndimage.convolve(def_array, weights=kernnel_array) / kernnel_array.size\n",
" im = Image.fromarray(result_fm)\n",
- " im.save(output_path)\n",
- " \n",
- " "
+ " im.save(output_path)"
]
},
{
@@ -212,23 +209,22 @@
"outputs": [],
"source": [
"# get deforestation in buffer zones\n",
- "\n",
- "vector_path = '../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp'\n",
+ "vector_path = \"../../datasets/processed/Satelligence_data/test_rasters_2/satelligence_mills_4326_50kmbuffer.shp\"\n",
"resolution = 6\n",
"\n",
"gdf_vector = gpd.read_file(vector_path)\n",
- "clean_gdf = gdf_vector[['gfw_fid','deforestat','geometry']]\n",
+ "clean_gdf = gdf_vector[[\"gfw_fid\", \"deforestat\", \"geometry\"]]\n",
"\n",
"_sum_calculated = []\n",
"for i, row in clean_gdf.iterrows():\n",
- " filtered_gdf = clean_gdf[i:i+1]\n",
- " #convert to h3\n",
+ " filtered_gdf = clean_gdf[i : i + 1]\n",
+ " # convert to h3\n",
" h3_gdf = filtered_gdf.h3.polyfill_resample(resolution)\n",
- " h3index_list = [f'0x{h3index}' for h3index in h3_gdf.index]\n",
- " _sum = merge_gdf[merge_gdf['h3index'].isin(h3index_list)]['deforestation_km2'].sum()*100\n",
+ " h3index_list = [f\"0x{h3index}\" for h3index in h3_gdf.index]\n",
+ " _sum = merge_gdf[merge_gdf[\"h3index\"].isin(h3index_list)][\"deforestation_km2\"].sum() * 100\n",
" _sum_calculated.append(_sum)\n",
- " \n",
- "#_sum_calculated"
+ "\n",
+ "# _sum_calculated"
]
},
{
@@ -238,11 +234,14 @@
"metadata": {},
"outputs": [],
"source": [
- "#zonal statistics raster\n",
- "stats_ = buffer_stats('../../datasets/raw/methodology_results/Deforestation_IDN_2021-01-01-2022-01-01.tif',\n",
- " '../../datasets/processed/palm_oil_mills/satelligence_mills_4326_point.shp',\n",
- " buffer=50000,\n",
- " stat_='sum', all_touched = False)"
+ "# zonal statistics raster\n",
+ "stats_ = buffer_stats(\n",
+ " \"../../datasets/raw/methodology_results/Deforestation_IDN_2021-01-01-2022-01-01.tif\",\n",
+ " \"../../datasets/processed/palm_oil_mills/satelligence_mills_4326_point.shp\",\n",
+ " buffer=50000,\n",
+ " stat_=\"sum\",\n",
+ " all_touched=False,\n",
+ ")"
]
},
{
@@ -252,7 +251,7 @@
"metadata": {},
"outputs": [],
"source": [
- "def_raster = list(stats_['estimated']*6.69019042035408517*6.69019042035408517* 0.0001)"
+ "def_raster = list(stats_[\"estimated\"] * 6.69019042035408517 * 6.69019042035408517 * 0.0001)"
]
},
{
@@ -314,20 +313,23 @@
"# ultiply def area in hectares\n",
"# then filter all locations where there is production\n",
"\n",
- "def_density = '../../datasets/raw/methodology_results/Deforestation_IDN_2021-01-01-2022-01-01_density.tif'\n",
- "def_area_ha = '../../datasets/raw/methodology_results/update/Deforestation_IDN_2021-01-01-2022-01-01_area_ha.tif'\n",
- "kernel_50km = '../../datasets/raw/methodology_results/test_location_buffer_raster.tif'\n",
+ "def_density = (\n",
+ " \"../../datasets/raw/methodology_results/Deforestation_IDN_2021-01-01-2022-01-01_density.tif\"\n",
+ ")\n",
+ "def_area_ha = \"../../datasets/raw/methodology_results/update/Deforestation_IDN_2021-01-01-2022-01-01_area_ha.tif\"\n",
+ "kernel_50km = \"../../datasets/raw/methodology_results/test_location_buffer_raster.tif\"\n",
"\n",
- "# pixel area in hectares = 8633.766614450342 \n",
- "#calculate deforestation area\n",
+ "# pixel area in hectares = 8633.766614450342\n",
+ "# calculate deforestation area\n",
"!gdal_calc.py --calc \"A*8633.766614450342\" --format GTiff --type Float32 --NoDataValue 0.0 -A $def_density --A_band 1 --outfile $def_area_ha;\n",
"\n",
"\n",
"## generate kernel\n",
- "focal_mean(raster_path = def_area_ha, \n",
- " kernel_path = kernel_50km, \n",
- " output_path = '../../datasets/raw/methodology_results/update/deforestation_50km_kernel_v2.tif')\n",
- "\n"
+ "focal_mean(\n",
+ " raster_path=def_area_ha,\n",
+ " kernel_path=kernel_50km,\n",
+ " output_path=\"../../datasets/raw/methodology_results/update/deforestation_50km_kernel_v2.tif\",\n",
+ ")"
]
},
{
@@ -387,7 +389,7 @@
"metadata": {},
"outputs": [],
"source": [
- "#set projection\n",
+ "# set projection\n",
"\n",
"## change extent and set projection\n",
"\n",
@@ -411,9 +413,11 @@
"source": [
"empty_array = np.zeros((2160, 4320))\n",
"im = Image.fromarray(empty_array)\n",
- "im.save('../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/empty.tif')\n",
+ "im.save(\n",
+ " \"../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/empty.tif\"\n",
+ ")\n",
"# geolocate with new extent\n",
- "!gdal_edit.py -a_srs EPSG:4326 -a_ulurll -180.0000000 90.0000000 179.9985600 90.0000000 -180.0000000 -89.9992800 -a_nodata -1 '../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/empty.tif'\n"
+ "!gdal_edit.py -a_srs EPSG:4326 -a_ulurll -180.0000000 90.0000000 179.9985600 90.0000000 -180.0000000 -89.9992800 -a_nodata -1 '../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/empty.tif'"
]
},
{
@@ -433,13 +437,22 @@
}
],
"source": [
- "all_ha_commodities = [file for file in os.listdir('../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff') if file.endswith('_A.tif')]\n",
- "\n",
- "for i in range(0,len(all_ha_commodities)):\n",
- " file = '../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/'+ all_ha_commodities[i]\n",
- " #print(f'Summing {all_ha_commodities[i]}...')\n",
+ "all_ha_commodities = [\n",
+ " file\n",
+ " for file in os.listdir(\n",
+ " \"../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff\"\n",
+ " )\n",
+ " if file.endswith(\"_A.tif\")\n",
+ "]\n",
+ "\n",
+ "for i in range(0, len(all_ha_commodities)):\n",
+ " file = (\n",
+ " \"../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/\"\n",
+ " + all_ha_commodities[i]\n",
+ " )\n",
+ " # print(f'Summing {all_ha_commodities[i]}...')\n",
" !gdal_calc.py --calc \"A+B\" --NoDataValue -1 --format GTiff --type Float32 --NoDataValue -1 -A ../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/empty.tif --A_band 1 -B $file --outfile ../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/empty.tif --q;\n",
- "print('Done!')"
+ "print(\"Done!\")"
]
},
{
@@ -458,7 +471,7 @@
}
],
"source": [
- "#clip data to area of interest\n",
+ "# clip data to area of interest\n",
"!gdal_translate -projwin 94.99998 6.10002 98.333313333 2.10002 -of GTiff ../../datasets/raw/methodology_results/harvest_area_mapspam/spam2010v2r0_global_harv_area.geotiff/empty.tif ../../datasets/raw/methodology_results/harvest_area_mapspam/harvest_area_sum_ha_clip.tif;"
]
},
@@ -681,46 +694,78 @@
}
],
"source": [
- "#translate density raster to h3\n",
- "rp_density = '../../datasets/raw/methodology_results/Deforestation_IDN_2021-01-01-2022-01-01_density.tif'\n",
- "rp_area = '../../datasets/processed/Satelligence_data/area_ratio/8_Areakm_clip_ind.tif'\n",
- "rp_oil_prod_t = '../../datasets/raw/methodology_results/spam_palm_oil_prod_clip.tif'\n",
- "rp_oil_ha = '../../datasets/raw/methodology_results/spam_palm_oil_ha_clip.tif'\n",
- "rp_all_comm_ha = '../../datasets/raw/methodology_results/harvest_area_mapspam/harvest_area_sum_ha_clip.tif'\n",
- "kernel_Def = '../../datasets/raw/methodology_results/update/deforestation_50km_kernel_v2.tif'\n",
+ "# translate density raster to h3\n",
+ "rp_density = (\n",
+ " \"../../datasets/raw/methodology_results/Deforestation_IDN_2021-01-01-2022-01-01_density.tif\"\n",
+ ")\n",
+ "rp_area = \"../../datasets/processed/Satelligence_data/area_ratio/8_Areakm_clip_ind.tif\"\n",
+ "rp_oil_prod_t = \"../../datasets/raw/methodology_results/spam_palm_oil_prod_clip.tif\"\n",
+ "rp_oil_ha = \"../../datasets/raw/methodology_results/spam_palm_oil_ha_clip.tif\"\n",
+ "rp_all_comm_ha = (\n",
+ " \"../../datasets/raw/methodology_results/harvest_area_mapspam/harvest_area_sum_ha_clip.tif\"\n",
+ ")\n",
+ "kernel_Def = \"../../datasets/raw/methodology_results/update/deforestation_50km_kernel_v2.tif\"\n",
"\n",
"\n",
"resolution = 6\n",
"\n",
"\n",
- "\n",
- "\n",
- "gdf_def_density = raster_to_h3(rp_density, resolution=resolution, field ='def_density', plot=True)\n",
+ "gdf_def_density = raster_to_h3(rp_density, resolution=resolution, field=\"def_density\", plot=True)\n",
"\n",
"# translate pixel area to h3 to compute pixel area/h3 area ratio\n",
- "#translate density raster to h3\n",
- "gdf_def_area = raster_to_h3(rp_area, resolution=resolution, field='pixel_area_km2')\n",
- "gdf_po_prod = raster_to_h3(rp_oil_prod_t, resolution=resolution, field='prod_t', plot=True)\n",
- "gdf_po_ha = raster_to_h3(rp_oil_ha, resolution=resolution, field='harvst_ha', plot=True)\n",
- "gdf_allcommodities_ha = raster_to_h3(rp_all_comm_ha, resolution=resolution, field='harvst_all_ha', plot=True)\n",
- "gdf_kernel_Def = raster_to_h3(kernel_Def, resolution=resolution, field='kernel_def_ha', plot=True)\n",
+ "# translate density raster to h3\n",
+ "gdf_def_area = raster_to_h3(rp_area, resolution=resolution, field=\"pixel_area_km2\")\n",
+ "gdf_po_prod = raster_to_h3(rp_oil_prod_t, resolution=resolution, field=\"prod_t\", plot=True)\n",
+ "gdf_po_ha = raster_to_h3(rp_oil_ha, resolution=resolution, field=\"harvst_ha\", plot=True)\n",
+ "gdf_allcommodities_ha = raster_to_h3(\n",
+ " rp_all_comm_ha, resolution=resolution, field=\"harvst_all_ha\", plot=True\n",
+ ")\n",
+ "gdf_kernel_Def = raster_to_h3(kernel_Def, resolution=resolution, field=\"kernel_def_ha\", plot=True)\n",
"\n",
"\n",
"## merge datasets\n",
"\n",
- "gdf_merge = gdf_po_prod.merge(gdf_po_ha, on='h3index', how='outer').merge(gdf_def_area, on='h3index', how='outer')[['h3index', 'pixel_area_km2', 'prod_t', 'harvst_ha', 'geometry']].merge(gdf_def_density, on='h3index', how='outer').merge(gdf_allcommodities_ha, on='h3index', how='outer')\n",
- "\n",
- "\n",
- "#clean merged dataset - get just one geometry\n",
- "\n",
- "gdf_merge = gdf_merge[['h3index','def_density', 'pixel_area_km2', 'prod_t', 'harvst_ha','harvst_all_ha','geometry_x']].rename(columns={'geometry_x':'geometry'})\n",
- "gdf_merge = gdf_merge.merge(gdf_kernel_Def, on='h3index', how='outer')[['h3index','def_density', 'pixel_area_km2', 'prod_t', 'harvst_ha','harvst_all_ha','kernel_def_ha','geometry_x']].rename(columns={'geometry_x':'geometry'})\n",
- "\n",
- "#calculate deforestation area \n",
- "gdf_merge['def_area_ha'] = gdf_merge['pixel_area_km2']*100*gdf_merge['def_density']\n",
- "gdf_merge['h3index'] = [h3index.split('x')[1] for h3index in gdf_merge['h3index']]\n",
- "gdf_merge['h3Area_km2'] = [h3.cell_area(h3index) for h3index in list(gdf_merge['h3index'])]\n",
- "gdf_merge['area_ratio'] = gdf_merge['h3Area_km2']/gdf_merge['pixel_area_km2']\n",
+ "gdf_merge = (\n",
+ " gdf_po_prod.merge(gdf_po_ha, on=\"h3index\", how=\"outer\")\n",
+ " .merge(gdf_def_area, on=\"h3index\", how=\"outer\")[\n",
+ " [\"h3index\", \"pixel_area_km2\", \"prod_t\", \"harvst_ha\", \"geometry\"]\n",
+ " ]\n",
+ " .merge(gdf_def_density, on=\"h3index\", how=\"outer\")\n",
+ " .merge(gdf_allcommodities_ha, on=\"h3index\", how=\"outer\")\n",
+ ")\n",
+ "\n",
+ "\n",
+ "# clean merged dataset - get just one geometry\n",
+ "\n",
+ "gdf_merge = gdf_merge[\n",
+ " [\n",
+ " \"h3index\",\n",
+ " \"def_density\",\n",
+ " \"pixel_area_km2\",\n",
+ " \"prod_t\",\n",
+ " \"harvst_ha\",\n",
+ " \"harvst_all_ha\",\n",
+ " \"geometry_x\",\n",
+ " ]\n",
+ "].rename(columns={\"geometry_x\": \"geometry\"})\n",
+ "gdf_merge = gdf_merge.merge(gdf_kernel_Def, on=\"h3index\", how=\"outer\")[\n",
+ " [\n",
+ " \"h3index\",\n",
+ " \"def_density\",\n",
+ " \"pixel_area_km2\",\n",
+ " \"prod_t\",\n",
+ " \"harvst_ha\",\n",
+ " \"harvst_all_ha\",\n",
+ " \"kernel_def_ha\",\n",
+ " \"geometry_x\",\n",
+ " ]\n",
+ "].rename(columns={\"geometry_x\": \"geometry\"})\n",
+ "\n",
+ "# calculate deforestation area\n",
+ "gdf_merge[\"def_area_ha\"] = gdf_merge[\"pixel_area_km2\"] * 100 * gdf_merge[\"def_density\"]\n",
+ "gdf_merge[\"h3index\"] = [h3index.split(\"x\")[1] for h3index in gdf_merge[\"h3index\"]]\n",
+ "gdf_merge[\"h3Area_km2\"] = [h3.cell_area(h3index) for h3index in list(gdf_merge[\"h3index\"])]\n",
+ "gdf_merge[\"area_ratio\"] = gdf_merge[\"h3Area_km2\"] / gdf_merge[\"pixel_area_km2\"]\n",
"\n",
"gdf_merge.head()"
]
@@ -741,8 +786,8 @@
}
],
"source": [
- "gdf_merge = gdf_merge.set_geometry('geometry')\n",
- "gdf_merge.to_file('../../datasets/raw/methodology_results/update/gdf_kernel_Deforestation_v2.shp')"
+ "gdf_merge = gdf_merge.set_geometry(\"geometry\")\n",
+ "gdf_merge.to_file(\"../../datasets/raw/methodology_results/update/gdf_kernel_Deforestation_v2.shp\")"
]
},
{
@@ -819,16 +864,18 @@
}
],
"source": [
- "point_location = gpd.read_file('../../datasets/raw/methodology_results/test_location_point.geojson')\n",
+ "point_location = gpd.read_file(\"../../datasets/raw/methodology_results/test_location_point.geojson\")\n",
"point_location = point_location.h3.geo_to_h3(6).reset_index(drop=False)\n",
"\n",
- "point_location = point_location[['h3_06']]\n",
+ "point_location = point_location[[\"h3_06\"]]\n",
"\n",
- "point_location['geometry'] = Polygon(h3.h3_to_geo_boundary(point_location['h3_06'][0], geo_json=True))\n",
- "point_location = point_location.set_geometry('geometry')\n",
- "#point_location.to_file('../../datasets/raw/methodology_results/test_location_point_h3_res6_v3.shp')\n",
+ "point_location[\"geometry\"] = Polygon(\n",
+ " h3.h3_to_geo_boundary(point_location[\"h3_06\"][0], geo_json=True)\n",
+ ")\n",
+ "point_location = point_location.set_geometry(\"geometry\")\n",
+ "# point_location.to_file('../../datasets/raw/methodology_results/test_location_point_h3_res6_v3.shp')\n",
"\n",
- "point_location\n"
+ "point_location"
]
},
{
@@ -910,12 +957,12 @@
}
],
"source": [
- "#obtain deforestation that takes places in that hexagon\n",
+ "# obtain deforestation that takes places in that hexagon\n",
"\n",
- "h3index_list = list(point_location['h3_06'])\n",
+ "h3index_list = list(point_location[\"h3_06\"])\n",
"\n",
- "def_point_loc = gdf_merge[gdf_merge['h3index'].isin(h3index_list)]\n",
- "def_point_loc\n"
+ "def_point_loc = gdf_merge[gdf_merge[\"h3index\"].isin(h3index_list)]\n",
+ "def_point_loc"
]
},
{
@@ -936,19 +983,23 @@
}
],
"source": [
- "#asumming volume equal to 1T\n",
- "land_impact_point = 1000*def_point_loc['harvst_ha'].sum()/def_point_loc['prod_t'].sum()\n",
- "print(f'land impact: {land_impact_point} ha')\n",
+ "# asumming volume equal to 1T\n",
+ "land_impact_point = 1000 * def_point_loc[\"harvst_ha\"].sum() / def_point_loc[\"prod_t\"].sum()\n",
+ "print(f\"land impact: {land_impact_point} ha\")\n",
"\n",
- "def_if = sum(def_point_loc['kernel_def_ha'] * def_point_loc['prod_t'])/ def_point_loc['prod_t'].sum()\n",
- "print(f'Dif: {def_if}')\n",
+ "def_if = (\n",
+ " sum(def_point_loc[\"kernel_def_ha\"] * def_point_loc[\"prod_t\"]) / def_point_loc[\"prod_t\"].sum()\n",
+ ")\n",
+ "print(f\"Dif: {def_if}\")\n",
"\n",
- "#Weighted mean total cropland area per pixel\n",
- "def_total_cropland_area_per_pixel = (def_point_loc['harvst_all_ha'] * def_point_loc['prod_t']).dropna().sum() /def_point_loc['prod_t'].sum()\n",
- "print(f'Mean cropland area: {def_total_cropland_area_per_pixel}')\n",
+ "# Weighted mean total cropland area per pixel\n",
+ "def_total_cropland_area_per_pixel = (\n",
+ " def_point_loc[\"harvst_all_ha\"] * def_point_loc[\"prod_t\"]\n",
+ ").dropna().sum() / def_point_loc[\"prod_t\"].sum()\n",
+ "print(f\"Mean cropland area: {def_total_cropland_area_per_pixel}\")\n",
"\n",
"def_impact_2 = (def_if * land_impact_point) / def_total_cropland_area_per_pixel\n",
- "print(f'Revised forest loss risk:{def_impact_2} ha')"
+ "print(f\"Revised forest loss risk:{def_impact_2} ha\")"
]
},
{
@@ -1045,17 +1096,20 @@
}
],
"source": [
- "agg_point = gpd.read_file('../../datasets/raw/methodology_results/test_location_point.geojson')\n",
- "agg_point = agg_point.to_crs('EPSG:3857')\n",
+ "agg_point = gpd.read_file(\"../../datasets/raw/methodology_results/test_location_point.geojson\")\n",
+ "agg_point = agg_point.to_crs(\"EPSG:3857\")\n",
"agg_point = agg_point.buffer(50000)\n",
- "agg_point = agg_point.to_crs('EPSG:4326')\n",
+ "agg_point = agg_point.to_crs(\"EPSG:4326\")\n",
"\n",
- "h3_agg_point = h3.polyfill(agg_point.geometry[0].__geo_interface__, 6, geo_json_conformant = True)\n",
+ "h3_agg_point = h3.polyfill(agg_point.geometry[0].__geo_interface__, 6, geo_json_conformant=True)\n",
"\n",
"agg_point_gdf = gpd.GeoDataFrame(h3_agg_point)\n",
- "agg_point_gdf = agg_point_gdf.rename(columns={0:'h3index'})\n",
- "agg_point_gdf['geometry'] = [Polygon(h3.h3_to_geo_boundary(h3index, geo_json=True)) for h3index in list(agg_point_gdf['h3index'])]\n",
- "#agg_point_gdf.to_file('../../datasets/raw/methodology_results/test_agg_point_h3_res6_v2.shp')\n",
+ "agg_point_gdf = agg_point_gdf.rename(columns={0: \"h3index\"})\n",
+ "agg_point_gdf[\"geometry\"] = [\n",
+ " Polygon(h3.h3_to_geo_boundary(h3index, geo_json=True))\n",
+ " for h3index in list(agg_point_gdf[\"h3index\"])\n",
+ "]\n",
+ "# agg_point_gdf.to_file('../../datasets/raw/methodology_results/test_agg_point_h3_res6_v2.shp')\n",
"agg_point_gdf.head()"
]
},
@@ -1210,10 +1264,10 @@
}
],
"source": [
- "#obtain deforestation that takes places in that hexagon\n",
- "h3index_list = list(agg_point_gdf['h3index'])\n",
+ "# obtain deforestation that takes places in that hexagon\n",
+ "h3index_list = list(agg_point_gdf[\"h3index\"])\n",
"\n",
- "def_agg_loc = gdf_merge[gdf_merge['h3index'].isin(h3index_list)]\n",
+ "def_agg_loc = gdf_merge[gdf_merge[\"h3index\"].isin(h3index_list)]\n",
"def_agg_loc.head()"
]
},
@@ -1235,19 +1289,24 @@
}
],
"source": [
- "#asumming volume equal to 1T\n",
- "land_impact_agg_point = 1000*def_agg_loc['harvst_ha'].sum()/def_agg_loc['prod_t'].sum()\n",
- "print(f'land impact: {land_impact_agg_point} ha')\n",
- "\n",
- "def_if = sum((def_agg_loc['kernel_def_ha'] * def_agg_loc['prod_t']).dropna()) / def_agg_loc['prod_t'].sum()\n",
- "print(f'Dif: {def_if}')\n",
- "\n",
- "#Weighted mean total cropland area per pixel\n",
- "def_total_cropland_area_per_pixel = (def_agg_loc['harvst_all_ha'] * def_agg_loc['prod_t']).dropna().sum() /def_agg_loc['prod_t'].sum()\n",
- "print(f'Mean cropland area: {def_total_cropland_area_per_pixel}')\n",
+ "# asumming volume equal to 1T\n",
+ "land_impact_agg_point = 1000 * def_agg_loc[\"harvst_ha\"].sum() / def_agg_loc[\"prod_t\"].sum()\n",
+ "print(f\"land impact: {land_impact_agg_point} ha\")\n",
+ "\n",
+ "def_if = (\n",
+ " sum((def_agg_loc[\"kernel_def_ha\"] * def_agg_loc[\"prod_t\"]).dropna())\n",
+ " / def_agg_loc[\"prod_t\"].sum()\n",
+ ")\n",
+ "print(f\"Dif: {def_if}\")\n",
+ "\n",
+ "# Weighted mean total cropland area per pixel\n",
+ "def_total_cropland_area_per_pixel = (\n",
+ " def_agg_loc[\"harvst_all_ha\"] * def_agg_loc[\"prod_t\"]\n",
+ ").dropna().sum() / def_agg_loc[\"prod_t\"].sum()\n",
+ "print(f\"Mean cropland area: {def_total_cropland_area_per_pixel}\")\n",
"\n",
"def_impact_agg_2 = (def_if * land_impact_agg_point) / def_total_cropland_area_per_pixel\n",
- "print(f'Revised forest loss risk:{def_impact_agg_2} ha')\n"
+ "print(f\"Revised forest loss risk:{def_impact_agg_2} ha\")"
]
},
{
@@ -1273,19 +1332,23 @@
"evalue": "name 'def_impact_agg' is not defined",
"output_type": "error",
"traceback": [
- "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
- "\u001B[0;31mNameError\u001B[0m Traceback (most recent call last)",
- "\u001B[0;32m\u001B[0m in \u001B[0;36m\u001B[0;34m\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[0;31m## map - land impact aggregation point:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 2\u001B[0m \u001B[0mdef_agg_loc\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'land_impact_ha'\u001B[0m\u001B[0;34m]\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;34m(\u001B[0m\u001B[0mdef_agg_loc\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'prod_t'\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m*\u001B[0m\u001B[0mland_impact_agg_point\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;34m/\u001B[0m \u001B[0mdef_agg_loc\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'prod_t'\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0msum\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m----> 3\u001B[0;31m \u001B[0mdef_agg_loc\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'def_impact_ha'\u001B[0m\u001B[0;34m]\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;34m(\u001B[0m\u001B[0mdef_agg_loc\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'prod_t'\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m*\u001B[0m\u001B[0mdef_impact_agg\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;34m/\u001B[0m \u001B[0mdef_agg_loc\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'prod_t'\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0msum\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 4\u001B[0m \u001B[0mdef_agg_loc\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mdef_agg_loc\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mset_geometry\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'geometry'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 5\u001B[0m \u001B[0;31m#def_agg_loc.to_file('../../datasets/raw/methodology_results/update/agg_point_h3_res6_impact_v1_kernel.shp')\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
- "\u001B[0;31mNameError\u001B[0m: name 'def_impact_agg' is not defined"
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m## map - land impact aggregation point:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mdef_agg_loc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'land_impact_ha'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdef_agg_loc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'prod_t'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mland_impact_agg_point\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mdef_agg_loc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'prod_t'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mdef_agg_loc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'def_impact_ha'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdef_agg_loc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'prod_t'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mdef_impact_agg\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mdef_agg_loc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'prod_t'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mdef_agg_loc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdef_agg_loc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_geometry\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'geometry'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m#def_agg_loc.to_file('../../datasets/raw/methodology_results/update/agg_point_h3_res6_impact_v1_kernel.shp')\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mNameError\u001b[0m: name 'def_impact_agg' is not defined"
]
}
],
"source": [
"## map - land impact aggregation point:\n",
- "def_agg_loc['land_impact_ha'] = (def_agg_loc['prod_t']*land_impact_agg_point) / def_agg_loc['prod_t'].sum()\n",
- "def_agg_loc['def_impact_ha'] = (def_agg_loc['prod_t']*def_impact_agg) / def_agg_loc['prod_t'].sum()\n",
- "def_agg_loc = def_agg_loc.set_geometry('geometry')\n",
- "#def_agg_loc.to_file('../../datasets/raw/methodology_results/update/agg_point_h3_res6_impact_v1_kernel.shp')\n",
+ "def_agg_loc[\"land_impact_ha\"] = (def_agg_loc[\"prod_t\"] * land_impact_agg_point) / def_agg_loc[\n",
+ " \"prod_t\"\n",
+ "].sum()\n",
+ "def_agg_loc[\"def_impact_ha\"] = (def_agg_loc[\"prod_t\"] * def_impact_agg) / def_agg_loc[\n",
+ " \"prod_t\"\n",
+ "].sum()\n",
+ "def_agg_loc = def_agg_loc.set_geometry(\"geometry\")\n",
+ "# def_agg_loc.to_file('../../datasets/raw/methodology_results/update/agg_point_h3_res6_impact_v1_kernel.shp')\n",
"\n",
"def_agg_loc.head()"
]
@@ -1378,21 +1441,25 @@
}
],
"source": [
- "adm_loc = gpd.read_file('../../datasets/raw/methodology_results/aceh_loc.geojson')\n",
+ "adm_loc = gpd.read_file(\"../../datasets/raw/methodology_results/aceh_loc.geojson\")\n",
"adm_loc = adm_loc.explode(index_parts=True)\n",
- "h3_multipol = [h3.polyfill(geom.__geo_interface__, 6, geo_json_conformant = True) for geom in list(adm_loc['geometry'])]\n",
+ "h3_multipol = [\n",
+ " h3.polyfill(geom.__geo_interface__, 6, geo_json_conformant=True)\n",
+ " for geom in list(adm_loc[\"geometry\"])\n",
+ "]\n",
"\n",
- "for i in range(0,len(h3_multipol)):\n",
+ "for i in range(0, len(h3_multipol)):\n",
" if i == 0:\n",
" df_mult = pd.DataFrame(h3_multipol[i])\n",
" else:\n",
- " \n",
" df_ = pd.DataFrame(h3_multipol[i])\n",
" df_mult = pd.concat([df_mult, df_])\n",
- "df_mult = df_mult.rename(columns={0:'h3index'})\n",
- "df_mult['geometry'] = [Polygon(h3.h3_to_geo_boundary(h3index, geo_json=True)) for h3index in list(df_mult['h3index'])]\n",
- "df_mult = df_mult.set_geometry('geometry')\n",
- "#df_mult.to_file('../../datasets/raw/methodology_results/test_aceh_h3_res6.shp')\n",
+ "df_mult = df_mult.rename(columns={0: \"h3index\"})\n",
+ "df_mult[\"geometry\"] = [\n",
+ " Polygon(h3.h3_to_geo_boundary(h3index, geo_json=True)) for h3index in list(df_mult[\"h3index\"])\n",
+ "]\n",
+ "df_mult = df_mult.set_geometry(\"geometry\")\n",
+ "# df_mult.to_file('../../datasets/raw/methodology_results/test_aceh_h3_res6.shp')\n",
"df_mult.head()"
]
},
@@ -1547,10 +1614,10 @@
}
],
"source": [
- "#obtain deforestation that takes places in that hexagon\n",
- "h3index_list = list(df_mult['h3index'])\n",
+ "# obtain deforestation that takes places in that hexagon\n",
+ "h3index_list = list(df_mult[\"h3index\"])\n",
"\n",
- "def_aceh = gdf_merge[gdf_merge['h3index'].isin(h3index_list)]\n",
+ "def_aceh = gdf_merge[gdf_merge[\"h3index\"].isin(h3index_list)]\n",
"def_aceh.head()"
]
},
@@ -1571,19 +1638,21 @@
}
],
"source": [
- "#asumming volume equal to 1T\n",
- "land_impact_aceh = 1000*def_aceh['harvst_ha'].sum()/def_aceh['prod_t'].sum()\n",
- "print(f'land impact: {land_impact_aceh} ha')\n",
+ "# asumming volume equal to 1T\n",
+ "land_impact_aceh = 1000 * def_aceh[\"harvst_ha\"].sum() / def_aceh[\"prod_t\"].sum()\n",
+ "print(f\"land impact: {land_impact_aceh} ha\")\n",
"\n",
- "def_if = (def_aceh['kernel_def_ha'] * def_aceh['prod_t']).dropna().sum() /def_aceh['prod_t'].sum()\n",
- "print(f'Dif: {def_if}')\n",
+ "def_if = (def_aceh[\"kernel_def_ha\"] * def_aceh[\"prod_t\"]).dropna().sum() / def_aceh[\"prod_t\"].sum()\n",
+ "print(f\"Dif: {def_if}\")\n",
"\n",
- "#Weighted mean total cropland area per pixel\n",
- "def_total_cropland_area_per_pixel = (def_aceh['harvst_all_ha'] * def_aceh['prod_t']).dropna().sum() /def_aceh['prod_t'].sum()\n",
- "print(f'Mean cropland area: {def_total_cropland_area_per_pixel}')\n",
+ "# Weighted mean total cropland area per pixel\n",
+ "def_total_cropland_area_per_pixel = (\n",
+ " def_aceh[\"harvst_all_ha\"] * def_aceh[\"prod_t\"]\n",
+ ").dropna().sum() / def_aceh[\"prod_t\"].sum()\n",
+ "print(f\"Mean cropland area: {def_total_cropland_area_per_pixel}\")\n",
"\n",
"def_impact_aceh_2 = (def_if * land_impact_aceh) / def_total_cropland_area_per_pixel\n",
- "print(f'Revised forest loss risk:{def_impact_aceh_2} ha')\n"
+ "print(f\"Revised forest loss risk:{def_impact_aceh_2} ha\")"
]
},
{
@@ -1763,10 +1832,10 @@
}
],
"source": [
- "def_aceh['land_impact_ha'] = (def_aceh['prod_t']*land_impact_aceh) / def_aceh['prod_t'].sum()\n",
- "def_aceh['def_impact_ha'] = (def_aceh['prod_t']*def_impact_aceh) / def_aceh['prod_t'].sum()\n",
- "def_aceh = def_aceh.set_geometry('geometry')\n",
- "def_aceh.to_file('../../datasets/raw/methodology_results/update/Aceh_h3_res6_impact_v1_kernel.shp')\n",
+ "def_aceh[\"land_impact_ha\"] = (def_aceh[\"prod_t\"] * land_impact_aceh) / def_aceh[\"prod_t\"].sum()\n",
+ "def_aceh[\"def_impact_ha\"] = (def_aceh[\"prod_t\"] * def_impact_aceh) / def_aceh[\"prod_t\"].sum()\n",
+ "def_aceh = def_aceh.set_geometry(\"geometry\")\n",
+ "def_aceh.to_file(\"../../datasets/raw/methodology_results/update/Aceh_h3_res6_impact_v1_kernel.shp\")\n",
"\n",
"def_aceh.head()"
]
diff --git a/data/notebooks/Lab/11_woodpulp_and_sateligence_preprocessing.ipynb b/data/notebooks/Lab/11_woodpulp_and_sateligence_preprocessing.ipynb
index bb2d186a3..2fa88fdc6 100644
--- a/data/notebooks/Lab/11_woodpulp_and_sateligence_preprocessing.ipynb
+++ b/data/notebooks/Lab/11_woodpulp_and_sateligence_preprocessing.ipynb
@@ -9,22 +9,15 @@
},
"outputs": [],
"source": [
- "from collections import namedtuple\n",
- "import math\n",
- "import os\n",
"from pathlib import Path\n",
"\n",
- "\n",
- "import numpy as np\n",
"import rasterio as rio\n",
"import rioxarray\n",
"from affine import Affine\n",
- "from h3ronpy import raster\n",
"from h3ronpy.raster import nearest_h3_resolution, raster_to_dataframe\n",
"from rasterio.coords import BoundingBox\n",
"from rasterio.enums import Resampling\n",
- "from rasterio.plot import show\n",
- "from shapely.geometry import Polygon"
+ "from rasterio.plot import show"
]
},
{
@@ -106,14 +99,21 @@
"metadata": {},
"outputs": [],
"source": [
- "def find_h3_res_best_fit(transform: Affine, shape: tuple[int, int], bounds: BoundingBox, resolution: int) -> list:\n",
+ "def find_h3_res_best_fit(\n",
+ " transform: Affine, shape: tuple[int, int], bounds: BoundingBox, resolution: int\n",
+ ") -> list:\n",
" result = []\n",
" for scale_factor in (x for x in range(1, 400)):\n",
" x_pix_size = transform.a * scale_factor\n",
" y_pix_size = transform.e * scale_factor\n",
"\n",
- " shape = (int((bounds.right - bounds.left) / x_pix_size), int((bounds.bottom - bounds.top) / y_pix_size))\n",
- " new_trans = Affine(x_pix_size, transform.b, transform.c, transform.d, y_pix_size, transform.f)\n",
+ " shape = (\n",
+ " int((bounds.right - bounds.left) / x_pix_size),\n",
+ " int((bounds.bottom - bounds.top) / y_pix_size),\n",
+ " )\n",
+ " new_trans = Affine(\n",
+ " x_pix_size, transform.b, transform.c, transform.d, y_pix_size, transform.f\n",
+ " )\n",
"\n",
" h3_res = nearest_h3_resolution(shape, new_trans, search_mode=\"min_diff\")\n",
" result.append((scale_factor, x_pix_size, shape, h3_res))\n",
@@ -601,10 +601,17 @@
}
],
"source": [
- "with rio.open(\"../../h3_data_importer/data/woodpulp/gfw_plantations_woodpulp_harvest_ha_res.tif\") as src:\n",
+ "with rio.open(\n",
+ " \"../../h3_data_importer/data/woodpulp/gfw_plantations_woodpulp_harvest_ha_res.tif\"\n",
+ ") as src:\n",
" show(src, interpolation=\"none\")\n",
" df = raster_to_dataframe(\n",
- " src.read(1), src.transform, h3_resolution=6, nodata_value=src.nodata, compacted=False, geo=True\n",
+ " src.read(1),\n",
+ " src.transform,\n",
+ " h3_resolution=6,\n",
+ " nodata_value=src.nodata,\n",
+ " compacted=False,\n",
+ " geo=True,\n",
" )"
]
},
@@ -654,7 +661,10 @@
"metadata": {},
"outputs": [],
"source": [
- "df.to_file(\"../../h3_data_importer/data/woodpulp/gfw_plantations_woodpulp_harvest_ha_h3.geojson\", driver=\"GeoJSON\")"
+ "df.to_file(\n",
+ " \"../../h3_data_importer/data/woodpulp/gfw_plantations_woodpulp_harvest_ha_h3.geojson\",\n",
+ " driver=\"GeoJSON\",\n",
+ ")"
]
},
{
@@ -684,7 +694,9 @@
"metadata": {},
"outputs": [],
"source": [
- "with rio.open(\"../../h3_data_importer/data/satelligence/Deforestation_Masked_2016-2022-10-01.tif\") as src:\n",
+ "with rio.open(\n",
+ " \"../../h3_data_importer/data/satelligence/Deforestation_Masked_2016-2022-10-01.tif\"\n",
+ ") as src:\n",
" target = find_h3_res_best_fit(src.transform, src.shape, src.bounds, 6)"
]
},
@@ -1236,7 +1248,9 @@
}
],
"source": [
- "deforest_risk = deforest_risk.rio.reproject(\"EPSG:4326\", resolution=(0.00075, 0.00075), resampling=Resampling.sum)\n",
+ "deforest_risk = deforest_risk.rio.reproject(\n",
+ " \"EPSG:4326\", resolution=(0.00075, 0.00075), resampling=Resampling.sum\n",
+ ")\n",
"deforest_risk"
]
},
@@ -1247,7 +1261,7 @@
"metadata": {},
"outputs": [],
"source": [
- "deforest_risk.rio.to_raster('../../h3_data_importer/data/satelligence/Deforestation_risk.tif')"
+ "deforest_risk.rio.to_raster(\"../../h3_data_importer/data/satelligence/Deforestation_risk.tif\")"
]
},
{
diff --git a/data/notebooks/Lab/1_biodiversity_indicator.ipynb b/data/notebooks/Lab/1_biodiversity_indicator.ipynb
index e172e2df7..f1728ca7c 100644
--- a/data/notebooks/Lab/1_biodiversity_indicator.ipynb
+++ b/data/notebooks/Lab/1_biodiversity_indicator.ipynb
@@ -71,15 +71,14 @@
"outputs": [],
"source": [
"## import libraries\n",
+ "import time\n",
+ "\n",
"import geopandas as gpd\n",
+ "import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
- "\n",
"import rasterio as rio\n",
"import rasterio.plot\n",
- "import matplotlib.pyplot as plt\n",
- "from rasterstats import zonal_stats\n",
- "\n",
- "import time"
+ "from rasterstats import zonal_stats"
]
},
{
@@ -315,7 +314,11 @@
}
],
"source": [
- "df = pd.read_excel(r'../../datasets/raw/biodiversity_indicators/Ch6 PSLregional v01.xlsx', sheet_name='Transformation_Ecoregion', header=[3])\n",
+ "df = pd.read_excel(\n",
+ " r\"../../datasets/raw/biodiversity_indicators/Ch6 PSLregional v01.xlsx\",\n",
+ " sheet_name=\"Transformation_Ecoregion\",\n",
+ " header=[3],\n",
+ ")\n",
"df.head()"
]
},
@@ -408,7 +411,7 @@
],
"source": [
"## select for the moment annual crops\n",
- "pdf_annual_crops =df[['eco_code', 'Median', 'lower 95%', 'upper 95%']]\n",
+ "pdf_annual_crops = df[[\"eco_code\", \"Median\", \"lower 95%\", \"upper 95%\"]]\n",
"pdf_annual_crops.head()"
]
},
@@ -636,7 +639,7 @@
],
"source": [
"## import the ecoregions data\n",
- "ecoregions = gpd.read_file('../../datasets/raw/biodiversity_indicators/official/wwf_terr_ecos.shp')\n",
+ "ecoregions = gpd.read_file(\"../../datasets/raw/biodiversity_indicators/official/wwf_terr_ecos.shp\")\n",
"ecoregions.head()"
]
},
@@ -863,11 +866,7 @@
}
],
"source": [
- "ecoregions_PDF = pd.merge(\n",
- " pdf_annual_crops,\n",
- " ecoregions,\n",
- " how= 'inner',\n",
- " on='eco_code')\n",
+ "ecoregions_PDF = pd.merge(pdf_annual_crops, ecoregions, how=\"inner\", on=\"eco_code\")\n",
"ecoregions_PDF.head()"
]
},
@@ -878,7 +877,7 @@
"metadata": {},
"outputs": [],
"source": [
- "ecoregions_PDF = ecoregions_PDF.set_geometry('geometry')"
+ "ecoregions_PDF = ecoregions_PDF.set_geometry(\"geometry\")"
]
},
{
@@ -890,8 +889,8 @@
"source": [
"# export\n",
"ecoregions_PDF.to_file(\n",
- " '../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors.shp',\n",
- " driver='ESRI Shapefile'\n",
+ " \"../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors.shp\",\n",
+ " driver=\"ESRI Shapefile\",\n",
")"
]
},
@@ -1011,15 +1010,24 @@
}
],
"source": [
- "#check calculated risk map\n",
- "with rio.open('../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors_4326.tif') as src:\n",
+ "# check calculated risk map\n",
+ "with rio.open(\n",
+ " \"../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors_4326.tif\"\n",
+ ") as src:\n",
" dat = src.read(1)\n",
- " fig, ax = plt.subplots(figsize=[15,10])\n",
- " #ax.set_ylim((-5,40))\n",
- " #ax.set_xlim((60,100))\n",
- " rio.plot.show(dat, vmin=2.8999999152285e-14, vmax=2.9376220100729e-12, cmap='Blues', ax=ax, transform=src.transform)\n",
- " #test_gdf.plot(ax=ax, alpha=0.5, color='Orange', edgecolor='yellow')\n",
- " ax.set_title('Regional taxa aggregated characetrization factors by ecoregion (PDF/m2 *yr)')"
+ " fig, ax = plt.subplots(figsize=[15, 10])\n",
+ " # ax.set_ylim((-5,40))\n",
+ " # ax.set_xlim((60,100))\n",
+ " rio.plot.show(\n",
+ " dat,\n",
+ " vmin=2.8999999152285e-14,\n",
+ " vmax=2.9376220100729e-12,\n",
+ " cmap=\"Blues\",\n",
+ " ax=ax,\n",
+ " transform=src.transform,\n",
+ " )\n",
+ " # test_gdf.plot(ax=ax, alpha=0.5, color='Orange', edgecolor='yellow')\n",
+ " ax.set_title(\"Regional taxa aggregated characetrization factors by ecoregion (PDF/m2 *yr)\")"
]
},
{
@@ -1056,7 +1064,7 @@
}
],
"source": [
- "#clip taxa aggregated characetrisation factors to deforestation extent \n",
+ "# clip taxa aggregated characetrisation factors to deforestation extent\n",
"!gdal_translate -projwin 100.024523992 2.91915145 103.814423992 -1.12192855 -of GTiff '../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors_4326.tif' '../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors_4326_clipped.tif'"
]
},
@@ -1091,8 +1099,10 @@
"metadata": {},
"outputs": [],
"source": [
- "taxa_cf_4326 = '../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors_4326_30m.tif'\n",
- "deforestation_4326 = '../../datasets/processed/deforestation_indicators/deforestation_risk_ha_2018.tif'"
+ "taxa_cf_4326 = \"../../datasets/processed/biodiversity_indicators/taxa_aggregated_characterization_factors_4326_30m.tif\"\n",
+ "deforestation_4326 = (\n",
+ " \"../../datasets/processed/deforestation_indicators/deforestation_risk_ha_2018.tif\"\n",
+ ")"
]
},
{
@@ -1352,17 +1362,26 @@
}
],
"source": [
- "#generate a cog with the biodiversity risk map\n",
- "#check calculated risk map\n",
- "with rio.open('../../datasets/processed/biodiversity_indicators/biodiversity_risk_cotton_epsg4326_PDF.tif') as src:\n",
+ "# generate a cog with the biodiversity risk map\n",
+ "# check calculated risk map\n",
+ "with rio.open(\n",
+ " \"../../datasets/processed/biodiversity_indicators/biodiversity_risk_cotton_epsg4326_PDF.tif\"\n",
+ ") as src:\n",
" dat = src.read(1)\n",
- " fig, ax = plt.subplots(figsize=[15,10])\n",
- " #ax.set_ylim((-5,40))\n",
- " #ax.set_xlim((60,100))\n",
- " rio.plot.show(dat, vmin=2.8999999152285e-14, vmax=1.1684577784499e-11, cmap='Oranges', ax=ax, transform=src.transform)\n",
- " #test_gdf.plot(ax=ax, alpha=0.5, color='Orange', edgecolor='yellow')\n",
- " ax.set_title('Biodiversity loss due to land use change risk map (PDF/yr)')\n",
- " \n",
+ " fig, ax = plt.subplots(figsize=[15, 10])\n",
+ " # ax.set_ylim((-5,40))\n",
+ " # ax.set_xlim((60,100))\n",
+ " rio.plot.show(\n",
+ " dat,\n",
+ " vmin=2.8999999152285e-14,\n",
+ " vmax=1.1684577784499e-11,\n",
+ " cmap=\"Oranges\",\n",
+ " ax=ax,\n",
+ " transform=src.transform,\n",
+ " )\n",
+ " # test_gdf.plot(ax=ax, alpha=0.5, color='Orange', edgecolor='yellow')\n",
+ " ax.set_title(\"Biodiversity loss due to land use change risk map (PDF/yr)\")\n",
+ "\n",
" # Dark red shows no data information\n",
" # Beige shows 0 risk"
]
@@ -1529,8 +1548,8 @@
}
],
"source": [
- "#import test data and filter by commodity - cotton (as the deforestation risk is for cotton) and indonesia (as the sample data es for indonesia)\n",
- "gdf = gpd.read_file('../../datasets/processed/user_data/located_lg_data_polygon_v2.shp')\n",
+ "# import test data and filter by commodity - cotton (as the deforestation risk is for cotton) and indonesia (as the sample data es for indonesia)\n",
+ "gdf = gpd.read_file(\"../../datasets/processed/user_data/located_lg_data_polygon_v2.shp\")\n",
"gdf.head()"
]
},
@@ -1606,7 +1625,7 @@
],
"source": [
"# lest assume that the risk map is for rubber - we will need to update this later on\n",
- "gdf = gdf.loc[(gdf['Material']=='Rubber') & (gdf['Country']=='Indonesia')]\n",
+ "gdf = gdf.loc[(gdf[\"Material\"] == \"Rubber\") & (gdf[\"Country\"] == \"Indonesia\")]\n",
"gdf"
]
},
@@ -1617,8 +1636,12 @@
"metadata": {},
"outputs": [],
"source": [
- "yield_rubber = '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare.tif'\n",
- "harvest_portion_rubber = '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction.tif'"
+ "yield_rubber = (\n",
+ " \"../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare.tif\"\n",
+ ")\n",
+ "harvest_portion_rubber = (\n",
+ " \"../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction.tif\"\n",
+ ")"
]
},
{
@@ -1628,9 +1651,11 @@
"metadata": {},
"outputs": [],
"source": [
- "#save test location\n",
- "gdf.to_file('../../datasets/raw/input_data_test/indonesia_test_shape.shp',\n",
- " driver='ESRI Shapefile',)"
+ "# save test location\n",
+ "gdf.to_file(\n",
+ " \"../../datasets/raw/input_data_test/indonesia_test_shape.shp\",\n",
+ " driver=\"ESRI Shapefile\",\n",
+ ")"
]
},
{
@@ -1668,7 +1693,6 @@
"metadata": {},
"outputs": [],
"source": [
- "\n",
"## add projection - same as the other ones for the calculations\n",
"!gdal_edit.py -a_srs EPSG:4326 '../../datasets/raw/input_data_test/indonesia_raster_volume.tif'"
]
@@ -1690,7 +1714,7 @@
],
"source": [
"## clip data to deforestation extent\n",
- "#clip harvest area fraction to deforestation extent \n",
+ "# clip harvest area fraction to deforestation extent\n",
"!gdal_translate -projwin 100.024523992 2.91915145 103.814423992 -1.12192855 -of GTiff '../../datasets/raw/input_data_test/indonesia_raster_volume.tif' '../../datasets/raw/input_data_test/indonesia_raster_volume_4326_clipped.tif'"
]
},
@@ -1713,7 +1737,7 @@
],
"source": [
"## downsample to deforestation resolution\n",
- "!gdalwarp -s_srs EPSG:4326 -tr 0.000269494417976 0.000269494417976 -r near -of GTiff '../../datasets/raw/input_data_test/indonesia_raster_volume_4326_clipped.tif' '../../datasets/raw/input_data_test/indonesia_raster_volume_4326_30m.tif'\n"
+ "!gdalwarp -s_srs EPSG:4326 -tr 0.000269494417976 0.000269494417976 -r near -of GTiff '../../datasets/raw/input_data_test/indonesia_raster_volume_4326_clipped.tif' '../../datasets/raw/input_data_test/indonesia_raster_volume_4326_30m.tif'"
]
},
{
@@ -1734,7 +1758,7 @@
}
],
"source": [
- "#reproject raster volume from epsg4326 to espg3857\n",
+ "# reproject raster volume from epsg4326 to espg3857\n",
"!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:3857 -tr 30 30 -r near -of GTiff '../../datasets/raw/input_data_test/indonesia_raster_volume_4326_30m.tif' '../../datasets/raw/input_data_test/indonesia_raster_volume_3857_30m.tif'"
]
},
@@ -1762,7 +1786,7 @@
}
],
"source": [
- "#clip harvest area fraction to deforestation extent \n",
+ "# clip harvest area fraction to deforestation extent\n",
"!gdal_translate -projwin 100.024523992 2.91915145 103.814423992 -1.12192855 -of GTiff $harvest_portion_rubber '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction_4326_clipped.tif'"
]
},
@@ -1784,7 +1808,7 @@
"source": [
"## downsample harvest area fraction -as it's area independent we can downsample the values into smaller pixel sizes\n",
"# downsample the clipped data\n",
- "!gdalwarp -s_srs EPSG:4326 -tr 0.000269494417976 0.000269494417976 -r near -of GTiff '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction_4326_clipped.tif' '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction_4326_30m.tif'\n"
+ "!gdalwarp -s_srs EPSG:4326 -tr 0.000269494417976 0.000269494417976 -r near -of GTiff '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction_4326_clipped.tif' '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction_4326_30m.tif'"
]
},
{
@@ -1939,7 +1963,7 @@
}
],
"source": [
- "#generate raster with pixel area raster\n",
+ "# generate raster with pixel area raster\n",
"# reclasifies the raster into 0 and pixel area being the pixel area just on thise locations with harvest area fraction\n",
"!gdal_calc.py -A '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction_3857_30m.tif' --outfile='../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/pixel_area_rubber_raster_epsg3857.tif' --calc=\"(A > 0) * (30*30)\""
]
@@ -2007,7 +2031,7 @@
],
"source": [
"gdf = gdf.set_crs(\"EPSG:4326\")\n",
- "print(f'projection of user data is: {gdf.crs}')"
+ "print(f\"projection of user data is: {gdf.crs}\")"
]
},
{
@@ -2017,8 +2041,8 @@
"metadata": {},
"outputs": [],
"source": [
- "#reproject the gdf to epsg3857 for the zonal statistics\n",
- "#reproject to epsg3857\n",
+ "# reproject the gdf to epsg3857 for the zonal statistics\n",
+ "# reproject to epsg3857\n",
"gdf = gdf.to_crs(\"EPSG:3857\")"
]
},
@@ -2029,7 +2053,7 @@
"metadata": {},
"outputs": [],
"source": [
- "gdf.to_file('../../datasets/processed/user_data/indonesia_test_3857.shp')"
+ "gdf.to_file(\"../../datasets/processed/user_data/indonesia_test_3857.shp\")"
]
},
{
@@ -2137,7 +2161,7 @@
}
],
"source": [
- "gdf = gpd.read_file('../../datasets/raw/input_data_test/indonesia_test_shape_clip.shp')\n",
+ "gdf = gpd.read_file(\"../../datasets/raw/input_data_test/indonesia_test_shape_clip.shp\")\n",
"gdf"
]
},
@@ -2156,13 +2180,12 @@
}
],
"source": [
- "#zonal stats in india to get the sum of all fraction harvest area\n",
- "total_harves_area_rubber = '../../datasets/raw/probability_map/area_total_rubber_raster_epsg3857.tif'\n",
+ "# zonal stats in india to get the sum of all fraction harvest area\n",
+ "total_harves_area_rubber = (\n",
+ " \"../../datasets/raw/probability_map/area_total_rubber_raster_epsg3857.tif\"\n",
+ ")\n",
"start_time = time.time()\n",
- "zs_indonesia_test = zonal_stats(\n",
- " gdf,\n",
- " total_harves_area_rubber,\n",
- " stats=\"sum\")\n",
+ "zs_indonesia_test = zonal_stats(gdf, total_harves_area_rubber, stats=\"sum\")\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -2181,7 +2204,7 @@
}
],
"source": [
- "print(f' The total rubber harvest area in indonessua is :', {zs_indonesia_test[0]['sum']}, 'm2')"
+ "print(\" The total rubber harvest area in indonessua is :\", {zs_indonesia_test[0][\"sum\"]}, \"m2\")"
]
},
{
@@ -2263,7 +2286,7 @@
],
"source": [
"## ad field to gdf\n",
- "gdf['Total_af'] = zs_indonesia_test[0]['sum']\n",
+ "gdf[\"Total_af\"] = zs_indonesia_test[0][\"sum\"]\n",
"gdf"
]
},
@@ -2274,7 +2297,7 @@
"metadata": {},
"outputs": [],
"source": [
- "gdf.to_file('../../datasets/processed/user_data/indonesia_test_3857.shp')"
+ "gdf.to_file(\"../../datasets/processed/user_data/indonesia_test_3857.shp\")"
]
},
{
@@ -2293,7 +2316,7 @@
],
"source": [
"## generate a raster with same extent as the other ones with this total area fraction value\n",
- "!gdal_rasterize -l indonesia_test_3857 -a Total_af -tr 30 30 -a_nodata 0.0 -ot Float32 -of GTiff '../../datasets/processed/user_data/indonesia_test_3857.shp' '../../datasets/raw/probability_map/indonesia_rubber_raster_total_af.tif'\n"
+ "!gdal_rasterize -l indonesia_test_3857 -a Total_af -tr 30 30 -a_nodata 0.0 -ot Float32 -of GTiff '../../datasets/processed/user_data/indonesia_test_3857.shp' '../../datasets/raw/probability_map/indonesia_rubber_raster_total_af.tif'"
]
},
{
@@ -2331,7 +2354,7 @@
}
],
"source": [
- "#clip harvest area fraction to deforestation extent \n",
+ "# clip harvest area fraction to deforestation extent\n",
"!gdal_translate -projwin 100.024523992 2.91915145 103.814423992 -1.12192855 -of GTiff '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare.tif' '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare_4326_clipped.tif'"
]
},
@@ -2353,7 +2376,7 @@
"source": [
"## downsample harvest area fraction -as it's area independent we can downsample the values into smaller pixel sizes\n",
"# downsample the clipped data\n",
- "!gdalwarp -s_srs EPSG:4326 -tr 0.000269494417976 0.000269494417976 -r near -of GTiff '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare_4326_clipped.tif' '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare_4326_30m.tif'\n"
+ "!gdalwarp -s_srs EPSG:4326 -tr 0.000269494417976 0.000269494417976 -r near -of GTiff '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare_4326_clipped.tif' '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare_4326_30m.tif'"
]
},
{
@@ -2372,8 +2395,8 @@
}
],
"source": [
- "#reproject yield from epsg4326 to epsg3857\n",
- "#reproject raster volume from epsg4326 to espg3857\n",
+ "# reproject yield from epsg4326 to epsg3857\n",
+ "# reproject raster volume from epsg4326 to espg3857\n",
"!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:3857 -tr 30 30 -r near -of GTiff '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare_4326_30m.tif' '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare_3857_30m.tif'"
]
},
@@ -2639,8 +2662,8 @@
}
],
"source": [
- "#fix extent od total area fraction raster\n",
- "!gdal_translate -projwin 11131949.079 334111.171 11549399.079 -111328.829 -of GTiff '../../datasets/raw/probability_map/indonesia_rubber_raster_total_af.tif' '../../datasets/raw/probability_map/indonesia_rubber_raster_total_af_new_extent.tif'\n"
+ "# fix extent od total area fraction raster\n",
+ "!gdal_translate -projwin 11131949.079 334111.171 11549399.079 -111328.829 -of GTiff '../../datasets/raw/probability_map/indonesia_rubber_raster_total_af.tif' '../../datasets/raw/probability_map/indonesia_rubber_raster_total_af_new_extent.tif'"
]
},
{
@@ -2774,7 +2797,7 @@
}
],
"source": [
- "#generate raster with pixel area raster\n",
+ "# generate raster with pixel area raster\n",
"# reclasifies the raster into 0 and pixel area being the pixel area just on thise locations with harvest area fraction\n",
"!gdal_calc.py -A '../../datasets/raw/input_data_test/indonesia_raster_volume_3857_30m.tif' -B '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_HarvestedAreaFraction_3857_30m.tif' -C '../../datasets/raw/probability_map/indonesia_rubber_raster_total_af_new_extent.tif' -D '../../datasets/raw/crop_data/rubber_HarvAreaYield_Geotiff/rubber_YieldPerHectare_3857_30m.tif' --outfile='../../datasets/processed/probability_map/purchase_area_distribution_rubber_indonesia_3857.tif' --calc=\"(A*B)/(C*D)\""
]
@@ -2887,13 +2910,22 @@
}
],
"source": [
- "with rio.open('../../datasets/processed/probability_map/purchase_area_distribution_rubber_indonesia_3857.tif') as src:\n",
+ "with rio.open(\n",
+ " \"../../datasets/processed/probability_map/purchase_area_distribution_rubber_indonesia_3857.tif\"\n",
+ ") as src:\n",
" image_array = src.read(1)\n",
- " fig, ax = plt.subplots(figsize=[15,10])\n",
- " ax.set_ylim((-111328.8286,334111.1714))\n",
- " ax.set_xlim((1.113195e+07,1.154940e+07))\n",
- " rio.plot.show(image_array, vmin=7.6509659718837e-11, vmax=3.2353862778438e-08, cmap='Oranges', ax=ax, transform=src.transform)\n",
- " ax.set_title('Geospatial responsibility - indonesia test')"
+ " fig, ax = plt.subplots(figsize=[15, 10])\n",
+ " ax.set_ylim((-111328.8286, 334111.1714))\n",
+ " ax.set_xlim((1.113195e07, 1.154940e07))\n",
+ " rio.plot.show(\n",
+ " image_array,\n",
+ " vmin=7.6509659718837e-11,\n",
+ " vmax=3.2353862778438e-08,\n",
+ " cmap=\"Oranges\",\n",
+ " ax=ax,\n",
+ " transform=src.transform,\n",
+ " )\n",
+ " ax.set_title(\"Geospatial responsibility - indonesia test\")"
]
},
{
@@ -3072,8 +3104,8 @@
}
],
"source": [
- "#reproject biodiversity risk map from epsg4326 to epsg3857\n",
- "#reproject raster volume from epsg4326 to espg3857\n",
+ "# reproject biodiversity risk map from epsg4326 to epsg3857\n",
+ "# reproject raster volume from epsg4326 to espg3857\n",
"!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:3857 -tr 30 30 -r near -of GTiff '../../datasets/processed/biodiversity_indicators/biodiversity_risk_cotton_epsg4326_PDF.tif' '../../datasets/processed/biodiversity_indicators/biodiversity_risk_cotton_epsg3857_PDF.tif'"
]
},
@@ -3231,13 +3263,17 @@
}
],
"source": [
- "with rio.open('../../datasets/processed/biodiversity_indicators/biodiversity_loss_dueTo_landusechange_3857_30m.tif') as src:\n",
+ "with rio.open(\n",
+ " \"../../datasets/processed/biodiversity_indicators/biodiversity_loss_dueTo_landusechange_3857_30m.tif\"\n",
+ ") as src:\n",
" image_array = src.read(1)\n",
- " fig, ax = plt.subplots(figsize=[15,10])\n",
- " ax.set_ylim((-111328.8286,334111.1714))\n",
- " ax.set_xlim((1.113195e+07,1.154940e+07))\n",
- " rio.plot.show(image_array, vmin=0, vmax=3.6318996466515e+28, cmap='Oranges', ax=ax, transform=src.transform)\n",
- " ax.set_title('BIodiversity impact - indonesia test')"
+ " fig, ax = plt.subplots(figsize=[15, 10])\n",
+ " ax.set_ylim((-111328.8286, 334111.1714))\n",
+ " ax.set_xlim((1.113195e07, 1.154940e07))\n",
+ " rio.plot.show(\n",
+ " image_array, vmin=0, vmax=3.6318996466515e28, cmap=\"Oranges\", ax=ax, transform=src.transform\n",
+ " )\n",
+ " ax.set_title(\"BIodiversity impact - indonesia test\")"
]
},
{
diff --git a/data/notebooks/Lab/2_water_use.ipynb b/data/notebooks/Lab/2_water_use.ipynb
index 77094cb66..dab4473f5 100644
--- a/data/notebooks/Lab/2_water_use.ipynb
+++ b/data/notebooks/Lab/2_water_use.ipynb
@@ -81,32 +81,16 @@
"metadata": {},
"outputs": [],
"source": [
+ "import time\n",
+ "\n",
"import geopandas as gpd\n",
- "import pandas as pd\n",
- "from shapely.geometry import Point\n",
+ "import matplotlib.pyplot as plt\n",
"import rasterio as rio\n",
"import rasterio.plot\n",
- "import matplotlib.pyplot as plt\n",
"from rasterio.plot import show_hist\n",
- "import time\n",
- "from rasterstats import gen_zonal_stats, gen_point_query\n",
- "from shapely.geometry import shape, mapping\n",
- "import folium\n",
- "from rasterstats import gen_zonal_stats, gen_point_query\n",
- "import h3\n",
"from rasterstats import zonal_stats"
]
},
- {
- "cell_type": "code",
- "execution_count": 87,
- "id": "1ef1fd14",
- "metadata": {},
- "outputs": [],
- "source": [
- "from processing.geolocating_data import GeolocateAddress"
- ]
- },
{
"cell_type": "markdown",
"id": "baecd78d",
@@ -138,8 +122,10 @@
"metadata": {},
"outputs": [],
"source": [
- "blwf_path = '../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/hdr.adf'\n",
- "ha_fraction_path = '../../datasets/raw/crop_data/cotton_HarvestedAreaFraction_epsg3857.tif'"
+ "blwf_path = (\n",
+ " \"../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/hdr.adf\"\n",
+ ")\n",
+ "ha_fraction_path = \"../../datasets/raw/crop_data/cotton_HarvestedAreaFraction_epsg3857.tif\""
]
},
{
@@ -240,7 +226,7 @@
}
],
"source": [
- "#explore datasets info for calculation - the three raster need to have the same extent and projection\n",
+ "# explore datasets info for calculation - the three raster need to have the same extent and projection\n",
"!gdalinfo $blwf_path"
]
},
@@ -481,7 +467,7 @@
],
"source": [
"pixel_area = 12051.131160772874864 * 12051.131160772874864\n",
- "print(f'The pixel area of the reprojected raster is: {pixel_area} m2')"
+ "print(f\"The pixel area of the reprojected raster is: {pixel_area} m2\")"
]
},
{
@@ -500,7 +486,7 @@
],
"source": [
"# renormalised back by the pixel area\n",
- "!gdal_calc.py -A '../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/wfbl_cotton_epsg3857.tif' --outfile='../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/wfbl_cotton_epsg3857_normalised.tif' --calc=\"A/145229762.254151\"\n"
+ "!gdal_calc.py -A '../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/wfbl_cotton_epsg3857.tif' --outfile='../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/wfbl_cotton_epsg3857_normalised.tif' --calc=\"A/145229762.254151\""
]
},
{
@@ -617,7 +603,7 @@
],
"source": [
"pixel_area = 0.083333333333333 * 0.083333333333333\n",
- "print(f'Pixel area in degrees: {pixel_area}')"
+ "print(f\"Pixel area in degrees: {pixel_area}\")"
]
},
{
@@ -868,8 +854,8 @@
}
],
"source": [
- "# explore gdal info for nearest raster \n",
- "!gdalinfo -stats -hist '../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/test_reprojections/wfbl_cotton_epsg3857_near.tif'\n"
+ "# explore gdal info for nearest raster\n",
+ "!gdalinfo -stats -hist '../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/test_reprojections/wfbl_cotton_epsg3857_near.tif'"
]
},
{
@@ -1020,8 +1006,10 @@
"metadata": {},
"outputs": [],
"source": [
- "blwf_cotton = '../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/test_reprojections/wfbl_cotton_epsg3857_near.tif'\n",
- "harvest_area_portion = '../../datasets/raw/crop_data/cotton_HarvestedAreaFraction_epsg3857_new_extent.tif'"
+ "blwf_cotton = \"../../datasets/raw/water_indicators/Report47-App-IV-RasterMaps/Cotton/wfbl_mmyr/test_reprojections/wfbl_cotton_epsg3857_near.tif\"\n",
+ "harvest_area_portion = (\n",
+ " \"../../datasets/raw/crop_data/cotton_HarvestedAreaFraction_epsg3857_new_extent.tif\"\n",
+ ")"
]
},
{
@@ -1150,14 +1138,21 @@
}
],
"source": [
- "with rio.open('../../datasets/processed/water_indicators/water_risk_cotton_epsg3857_v3.tif') as src:\n",
+ "with rio.open(\"../../datasets/processed/water_indicators/water_risk_cotton_epsg3857_v3.tif\") as src:\n",
" image_array = src.read(1)\n",
" msk = src.read_masks()\n",
- " fig, ax = plt.subplots(figsize=[15,10])\n",
- " rio.plot.show(image_array, vmin=4.4836601744862e-05, vmax=0.14, cmap='Reds' , ax=ax, transform=src.transform)\n",
- " ax.set_title('Geospatial responsibility - test location')\n",
- " \n",
- " #the dark red shows no data"
+ " fig, ax = plt.subplots(figsize=[15, 10])\n",
+ " rio.plot.show(\n",
+ " image_array,\n",
+ " vmin=4.4836601744862e-05,\n",
+ " vmax=0.14,\n",
+ " cmap=\"Reds\",\n",
+ " ax=ax,\n",
+ " transform=src.transform,\n",
+ " )\n",
+ " ax.set_title(\"Geospatial responsibility - test location\")\n",
+ "\n",
+ " # the dark red shows no data"
]
},
{
@@ -1181,8 +1176,10 @@
"metadata": {},
"outputs": [],
"source": [
- "water_risk = '../../datasets/processed/water_indicators/water_risk_cotton_epsg3857_v3.tif'\n",
- "probability_area ='../../datasets/processed/probability_map/purchase_area_distribution_cotton_epsg3857.tif'\n"
+ "water_risk = \"../../datasets/processed/water_indicators/water_risk_cotton_epsg3857_v3.tif\"\n",
+ "probability_area = (\n",
+ " \"../../datasets/processed/probability_map/purchase_area_distribution_cotton_epsg3857.tif\"\n",
+ ")"
]
},
{
@@ -1468,17 +1465,21 @@
}
],
"source": [
- "#check calculated risk map\n",
- "with rio.open( '../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857.tif') as src:\n",
+ "# check calculated risk map\n",
+ "with rio.open(\n",
+ " \"../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857.tif\"\n",
+ ") as src:\n",
" dat = src.read(1)\n",
- " fig, ax = plt.subplots(figsize=[15,10])\n",
- " ax.set_ylim((695174.093781,4.255931e+06))\n",
- " ax.set_xlim((7.582124e+06,1.084202e+07))\n",
- " rio.plot.show(dat, vmin=0, vmax=7.9023620167261e-09, cmap='Oranges', ax=ax, transform=src.transform)\n",
- " #gdf_india.plot(ax=ax, alpha=0.5, color='Orange', edgecolor='yellow')\n",
- " ax.set_title('Unsustainable water use in India - test location')\n",
- " \n",
- " #dark red shows no data values"
+ " fig, ax = plt.subplots(figsize=[15, 10])\n",
+ " ax.set_ylim((695174.093781, 4.255931e06))\n",
+ " ax.set_xlim((7.582124e06, 1.084202e07))\n",
+ " rio.plot.show(\n",
+ " dat, vmin=0, vmax=7.9023620167261e-09, cmap=\"Oranges\", ax=ax, transform=src.transform\n",
+ " )\n",
+ " # gdf_india.plot(ax=ax, alpha=0.5, color='Orange', edgecolor='yellow')\n",
+ " ax.set_title(\"Unsustainable water use in India - test location\")\n",
+ "\n",
+ " # dark red shows no data values"
]
},
{
@@ -1502,11 +1503,14 @@
],
"source": [
"from rasterio.plot import show_hist\n",
- "with rio.open( '../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857.tif') as src:\n",
+ "\n",
+ "with rio.open(\n",
+ " \"../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857.tif\"\n",
+ ") as src:\n",
" dat = src.read(1)\n",
" show_hist(\n",
- " src, bins=10, lw=0, stacked=False, alpha=0.3,\n",
- " histtype='stepfilled', title=\"Histogram\")"
+ " src, bins=10, lw=0, stacked=False, alpha=0.3, histtype=\"stepfilled\", title=\"Histogram\"\n",
+ " )"
]
},
{
@@ -1582,7 +1586,7 @@
}
],
"source": [
- "test_location = gpd.read_file('../../datasets/raw/probability_map/test_location_epsg3857.shp')\n",
+ "test_location = gpd.read_file(\"../../datasets/raw/probability_map/test_location_epsg3857.shp\")\n",
"test_location"
]
},
@@ -1784,7 +1788,7 @@
"source": [
"## reproject raster to epsg3857\n",
"# reproject the blue water footprint from epsg4326 to epsg3857\n",
- "!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:3857 -tr 12051.131160772875 12051.131160772875 -r near -of GTiff '../../datasets/processed/water_indicators/water_risk_cotton_epsg4326.tif' '../../datasets/processed/water_indicators/water_risk_cotton_epsg3857_v4.tif'\n"
+ "!gdalwarp -s_srs EPSG:4326 -t_srs EPSG:3857 -tr 12051.131160772875 12051.131160772875 -r near -of GTiff '../../datasets/processed/water_indicators/water_risk_cotton_epsg4326.tif' '../../datasets/processed/water_indicators/water_risk_cotton_epsg3857_v4.tif'"
]
},
{
@@ -1820,7 +1824,7 @@
],
"source": [
"## calculate matric using new reprojected layer\n",
- "!gdal_calc.py -A '../../datasets/processed/water_indicators/water_risk_cotton_epsg3857_v4.tif' -B '../../datasets/processed/probability_map/purchase_area_distribution_cotton_epsg3857_new_extent.tif' --outfile='../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857_v2.tif' --calc=\"A*B\"\n"
+ "!gdal_calc.py -A '../../datasets/processed/water_indicators/water_risk_cotton_epsg3857_v4.tif' -B '../../datasets/processed/probability_map/purchase_area_distribution_cotton_epsg3857_new_extent.tif' --outfile='../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857_v2.tif' --calc=\"A*B\""
]
},
{
@@ -1847,14 +1851,15 @@
],
"source": [
"## calculate zonal statistics in test location\n",
- "water_metric_v1 = '../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857.tif'\n",
- "water_metric_v2 = '../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857_v2.tif'\n",
+ "water_metric_v1 = (\n",
+ " \"../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857.tif\"\n",
+ ")\n",
+ "water_metric_v2 = (\n",
+ " \"../../datasets/processed/water_indicators/water_impact_cotton_test_location_epsg3857_v2.tif\"\n",
+ ")\n",
"\n",
"start_time = time.time()\n",
- "zs_india_test_v1 = zonal_stats(\n",
- " test_location,\n",
- " water_metric_v1,\n",
- " stats=\"sum\")\n",
+ "zs_india_test_v1 = zonal_stats(test_location, water_metric_v1, stats=\"sum\")\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
@@ -1865,7 +1870,7 @@
"metadata": {},
"outputs": [],
"source": [
- "print(f' water impact v1:', {zs_india_test[0]['sum']}, 'm2')"
+ "print(\" water impact v1:\", {zs_india_test[0][\"sum\"]}, \"m2\")"
]
},
{
@@ -1884,10 +1889,7 @@
],
"source": [
"start_time = time.time()\n",
- "zs_india_test_v2 = zonal_stats(\n",
- " test_location,\n",
- " water_metric_v2,\n",
- " stats=\"sum\")\n",
+ "zs_india_test_v2 = zonal_stats(test_location, water_metric_v2, stats=\"sum\")\n",
"print(\"--- %s seconds ---\" % (time.time() - start_time))"
]
},
diff --git a/data/notebooks/Lab/3_deforestation_risk.ipynb b/data/notebooks/Lab/3_deforestation_risk.ipynb
index 8471f9716..9bf2cac56 100644
--- a/data/notebooks/Lab/3_deforestation_risk.ipynb
+++ b/data/notebooks/Lab/3_deforestation_risk.ipynb
@@ -34,30 +34,17 @@
"metadata": {},
"outputs": [],
"source": [
+ "import os\n",
+ "import time\n",
+ "\n",
"import geopandas as gpd\n",
+ "import h3\n",
+ "import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
- "from shapely.geometry import Point\n",
"import rasterio as rio\n",
"import rasterio.plot\n",
- "import matplotlib.pyplot as plt\n",
"from rasterio.plot import show_hist\n",
- "import time\n",
- "from rasterstats import gen_zonal_stats, gen_point_query\n",
- "from shapely.geometry import shape, mapping\n",
- "import folium\n",
- "from rasterstats import gen_zonal_stats, gen_point_query\n",
- "import h3\n",
- "import os\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "id": "c0b4cbb2",
- "metadata": {},
- "outputs": [],
- "source": [
- "from processing.geolocating_data import GeolocateAddress"
+ "from rasterstats import gen_zonal_stats"
]
},
{
@@ -104,7 +91,7 @@
}
],
"source": [
- "input_path = '../../datasets/raw/satelligence_sample_data'\n",
+ "input_path = \"../../datasets/raw/satelligence_sample_data\"\n",
"\n",
"os.listdir(input_path)"
]
@@ -116,8 +103,8 @@
"metadata": {},
"outputs": [],
"source": [
- "baseline_path = input_path + '/forest_and_plantation_baseline'\n",
- "change_path = input_path + '/change_detection'"
+ "baseline_path = input_path + \"/forest_and_plantation_baseline\"\n",
+ "change_path = input_path + \"/change_detection\""
]
},
{
@@ -142,7 +129,7 @@
"source": [
"# Baseline Forest\n",
"\n",
- "files = [f\"/{f}\" for f in os.listdir(baseline_path) if '.tif' in f]\n",
+ "files = [f\"/{f}\" for f in os.listdir(baseline_path) if \".tif\" in f]\n",
"files"
]
},
@@ -209,7 +196,7 @@
"source": [
"file = baseline_path + files[1]\n",
"\n",
- "#explore datasets info for calculation - the three raster need to have the same extent and projection\n",
+ "# explore datasets info for calculation - the three raster need to have the same extent and projection\n",
"baseline_info = !gdalinfo $file\n",
"baseline_info"
]
@@ -222,7 +209,8 @@
"outputs": [],
"source": [
"from matplotlib.colors import ListedColormap\n",
- "custom_cmap = ListedColormap(['#ffffff','#9bff8f','#73a367'])"
+ "\n",
+ "custom_cmap = ListedColormap([\"#ffffff\", \"#9bff8f\", \"#73a367\"])"
]
},
{
@@ -243,20 +231,18 @@
}
],
"source": [
- "#check 2018 baseline\n",
+ "# check 2018 baseline\n",
"\n",
"# Legend\n",
- "# 0 Non forest \n",
+ "# 0 Non forest\n",
"# 1 Forest\n",
"# 2 Primary Forest\n",
"\n",
"with rio.open(file) as src:\n",
" image_array = src.read(1)\n",
- " fig, ax = plt.subplots(figsize=[15,10])\n",
+ " fig, ax = plt.subplots(figsize=[15, 10])\n",
" rio.plot.show(image_array, vmin=0, vmax=2, cmap=custom_cmap, ax=ax, transform=src.transform)\n",
- " ax.set_title('Baseline tree cover 2018')\n",
- " \n",
- " "
+ " ax.set_title(\"Baseline tree cover 2018\")"
]
},
{
@@ -570,7 +556,7 @@
"source": [
"file = baseline_path + files[2]\n",
"\n",
- "#explore datasets info for calculation - the three raster need to have the same extent and projection\n",
+ "# explore datasets info for calculation - the three raster need to have the same extent and projection\n",
"baseline_info = !gdalinfo $file\n",
"baseline_info"
]
@@ -593,9 +579,9 @@
}
],
"source": [
- "#check calculated risk map\n",
+ "# check calculated risk map\n",
"\n",
- "custom_cmap = ListedColormap([\"darkgreen\",\"#5eb342\",\"#3dd00d\",\"#ffd60e\",\"darkorange\"])\n",
+ "custom_cmap = ListedColormap([\"darkgreen\", \"#5eb342\", \"#3dd00d\", \"#ffd60e\", \"darkorange\"])\n",
"\n",
"# Legend\n",
"# 1 Primary Forest\n",
@@ -606,11 +592,9 @@
"\n",
"with rio.open(file) as src:\n",
" image_array = src.read(1)\n",
- " fig, ax = plt.subplots(figsize=[15,10])\n",
+ " fig, ax = plt.subplots(figsize=[15, 10])\n",
" rio.plot.show(image_array, vmin=0, vmax=5, cmap=custom_cmap, ax=ax, transform=src.transform)\n",
- " ax.set_title('Baseline tree cover 2019')\n",
- " \n",
- " "
+ " ax.set_title(\"Baseline tree cover 2019\")"
]
},
{
@@ -633,7 +617,7 @@
"source": [
"# Forest change\n",
"\n",
- "files = [f\"/{f}\" for f in os.listdir(change_path) if '.tif' in f]\n",
+ "files = [f\"/{f}\" for f in os.listdir(change_path) if \".tif\" in f]\n",
"files"
]
},
@@ -691,7 +675,7 @@
"source": [
"file = change_path + files[0]\n",
"\n",
- "#explore datasets info for calculation - the three raster need to have the same extent and projection\n",
+ "# explore datasets info for calculation - the three raster need to have the same extent and projection\n",
"change_info = !gdalinfo $file\n",
"change_info"
]
@@ -714,7 +698,7 @@
}
],
"source": [
- "#check calculated risk map\n",
+ "# check calculated risk map\n",
"\n",
"# Values in format YYYY-jjj e.g. 2019074 where jjj = julian day from 1-366\n",
"\n",
@@ -722,12 +706,10 @@
" image_array = src.read(1)\n",
" meta = src.meta\n",
" profile = src.profile\n",
- " \n",
- " fig, ax = plt.subplots(figsize=[15,10])\n",
- " rio.plot.show(image_array, vmin=0, vmax=5, cmap='Blues', ax=ax, transform=src.transform)\n",
- " ax.set_title('Change')\n",
- " \n",
- " "
+ "\n",
+ " fig, ax = plt.subplots(figsize=[15, 10])\n",
+ " rio.plot.show(image_array, vmin=0, vmax=5, cmap=\"Blues\", ax=ax, transform=src.transform)\n",
+ " ax.set_title(\"Change\")"
]
},
{
@@ -749,9 +731,7 @@
],
"source": [
"src = rio.open(file)\n",
- "show_hist(\n",
- " src, bins=50, lw=0.0, stacked=False, alpha=0.3,\n",
- " histtype='stepfilled', title=\"Histogram\")"
+ "show_hist(src, bins=50, lw=0.0, stacked=False, alpha=0.3, histtype=\"stepfilled\", title=\"Histogram\")"
]
},
{
@@ -819,9 +799,9 @@
"source": [
"custom_cmap = ListedColormap([\"white\", \"#f69\"])\n",
"\n",
- "fig, ax = plt.subplots(figsize=[15,10])\n",
+ "fig, ax = plt.subplots(figsize=[15, 10])\n",
"rio.plot.show(loss2018_array, vmin=0, vmax=1, cmap=custom_cmap, ax=ax, transform=src.transform)\n",
- "ax.set_title('Change')"
+ "ax.set_title(\"Change\")"
]
},
{
@@ -854,7 +834,7 @@
}
],
"source": [
- "output_path = '../../datasets/processed/'\n",
+ "output_path = \"../../datasets/processed/\"\n",
"\n",
"os.listdir(input_path)"
]
@@ -896,10 +876,10 @@
"outputs": [],
"source": [
"## Save as rio dataset\n",
- "defor_path = output_path + 'sat_loss_2018.tif'\n",
+ "defor_path = output_path + \"sat_loss_2018.tif\"\n",
"with rasterio.open(defor_path, \"w\", **profile) as dest:\n",
" dest.write(loss2018_array, 1)\n",
- " \n",
+ "\n",
"loss2018_array = None"
]
},
@@ -929,12 +909,10 @@
" image_array = src.read(1)\n",
" meta = src.meta\n",
" profile = src.profile\n",
- " \n",
- " fig, ax = plt.subplots(figsize=[15,10])\n",
+ "\n",
+ " fig, ax = plt.subplots(figsize=[15, 10])\n",
" rio.plot.show(image_array, vmin=0, vmax=1, cmap=custom_cmap, ax=ax, transform=src.transform)\n",
- " ax.set_title('Loss 2018')\n",
- " \n",
- " "
+ " ax.set_title(\"Loss 2018\")"
]
},
{
@@ -956,9 +934,7 @@
],
"source": [
"src = rio.open(defor_path)\n",
- "show_hist(\n",
- " src, bins=50, lw=0.0, stacked=False, alpha=0.3,\n",
- " histtype='stepfilled', title=\"Histogram\")"
+ "show_hist(src, bins=50, lw=0.0, stacked=False, alpha=0.3, histtype=\"stepfilled\", title=\"Histogram\")"
]
},
{
@@ -993,7 +969,7 @@
"metadata": {},
"outputs": [],
"source": [
- "ha_fraction_path = '../../datasets/raw/cotton_HarvestedAreaFraction.tif'"
+ "ha_fraction_path = \"../../datasets/raw/cotton_HarvestedAreaFraction.tif\""
]
},
{
@@ -1074,7 +1050,7 @@
"## -of GTiff: geotiff we want to clip - change extent\n",
"# e.g. -of GTiff