From 6b12cabfad75b2087d62250b36bf21f89dd1ab50 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Tue, 29 Oct 2024 14:50:40 -0400 Subject: [PATCH 01/28] Add unify_projection option, partially hardcoded MVP impl. --- app/models/pydantic/creation_options.py | 7 +++ .../raster_tile_set_assets.py | 29 +++++++-- app/tasks/raster_tile_set_assets/utils.py | 30 +++++++++ batch/scripts/get_arguments.sh | 5 ++ batch/scripts/resample.sh | 2 +- batch/scripts/unify_projection.sh | 63 +++++++++++++++++++ 6 files changed, 131 insertions(+), 5 deletions(-) create mode 100644 batch/scripts/unify_projection.sh diff --git a/app/models/pydantic/creation_options.py b/app/models/pydantic/creation_options.py index 02a4a1f9d..961ff9010 100644 --- a/app/models/pydantic/creation_options.py +++ b/app/models/pydantic/creation_options.py @@ -115,6 +115,13 @@ class FieldType(StrictBaseModel): class RasterTileSetAssetCreationOptions(StrictBaseModel): + unify_projection: bool = Field( + False, + description=( + "First re-project to a common projection (EPSG:4326). Necessary " + "when input files are in different projections from each other." + ) + ) pixel_meaning: str data_type: DataType nbits: Optional[int] diff --git a/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py b/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py index 102475a03..6e34b4563 100644 --- a/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py +++ b/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py @@ -18,9 +18,10 @@ from app.models.pydantic.geostore import FeatureCollection from app.models.pydantic.jobs import Job from app.models.pydantic.statistics import BandStats, Histogram, RasterStats +from app.settings.globals import DATA_LAKE_BUCKET from app.tasks import Callback, callback_constructor from app.tasks.batch import execute -from app.tasks.raster_tile_set_assets.utils import create_pixetl_job +from app.tasks.raster_tile_set_assets.utils import create_pixetl_job, create_unify_projection_job from app.utils.aws import get_s3_client from app.utils.path import ( get_asset_uri, @@ -67,13 +68,33 @@ async def raster_tile_set_asset( creation_options = PixETLCreationOptions(**co) + jobs: List[Job] = list() callback: Callback = callback_constructor(asset_id) - create_raster_tile_set_job: Job = await create_pixetl_job( - dataset, version, creation_options, "create_raster_tile_set", callback + if creation_options.unify_projection: + target_crs = "epsg:4326" + new_src_uris = list() + for i,_ in enumerate(creation_options.source_uri): + new_src_uris.append( + f"s3://{DATA_LAKE_BUCKET}/{dataset}/{version}/raster/{target_crs}/original/SRC_{i}" + ) + target_prefix = new_src_uris[0].rsplit("/", 1)[0] + jobs.append( + await create_unify_projection_job( + dataset, creation_options.source_uri, target_prefix, target_crs, "unify_projection", callback + ) + ) + + creation_options.source_uri = new_src_uris + + + jobs.append( + await create_pixetl_job( + dataset, version, creation_options, "create_raster_tile_set", callback + ) ) - log: ChangeLog = await execute([create_raster_tile_set_job]) + log: ChangeLog = await execute(jobs) return log diff --git a/app/tasks/raster_tile_set_assets/utils.py b/app/tasks/raster_tile_set_assets/utils.py index 94075b501..5fae9b21f 100644 --- a/app/tasks/raster_tile_set_assets/utils.py +++ b/app/tasks/raster_tile_set_assets/utils.py @@ -225,3 +225,33 @@ async def create_resample_job( parents=[parent.job_name for parent in parents] if parents else None, **kwargs, ) + +async def create_unify_projection_job( + dataset: str, + old_source_uris: List[str], + target_prefix: str, + target_crs: str, + job_name: str, + callback: Callback +): + """ + """ + + command = [ + "unify_projection.sh", + "--target_crs", + target_crs, + ] + + for s in old_source_uris: + command.extend(["--source", s]) + + command.extend(["--target", target_prefix]) + + return PixETLJob( + dataset=dataset, + job_name=job_name, + command=command, + environment=JOB_ENV, + callback=callback, + ) diff --git a/batch/scripts/get_arguments.sh b/batch/scripts/get_arguments.sh index 067bd24d8..216cc68a5 100755 --- a/batch/scripts/get_arguments.sh +++ b/batch/scripts/get_arguments.sh @@ -201,6 +201,11 @@ do shift # past argument shift # past value ;; + --target_crs) + TARGET_CRS="$2" + shift # past argument + shift # past value + ;; --target_bucket) TARGET_BUCKET="$2" shift # past argument diff --git a/batch/scripts/resample.sh b/batch/scripts/resample.sh index 7ecdc0557..e27596de6 100644 --- a/batch/scripts/resample.sh +++ b/batch/scripts/resample.sh @@ -6,7 +6,7 @@ set -e # -d | --dataset # -v | --version # -s | --source -# -r | --resampling_method) +# -r | --resampling_method # --zoom_level # -T | --target diff --git a/batch/scripts/unify_projection.sh b/batch/scripts/unify_projection.sh new file mode 100644 index 000000000..4e7aca458 --- /dev/null +++ b/batch/scripts/unify_projection.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +set -e + +# requires arguments +# -s | --source +# -T | --target +# --target_crs + +ME=$(basename "$0") +. get_arguments.sh "$@" + +echo "Reproject to a common CRS" + +# Build an array of arguments to pass to unify_projection.py +#ARG_ARRAY=("--source" "${SRC}") +# +#ARG_ARRAY+=("--target" "${TARGET}") +# +#ARG_ARRAY+=("--target-crs" "${TARGET_CRS}") + +# Run unify_projection.py with the array of arguments +#unify_projection.py "${ARG_ARRAY[@]}" + +#gdist="gs://earthenginepartners-hansen/DIST-ALERT" + +# files="" +# for i in {2..60}; do files="$files $i.tif"; done +# Skip 01.tif, 59.tif, and 60.tif for now (problems around the date line) +files="02.tif 03.tif 04.tif 05.tif 06.tif 07.tif 08.tif 09.tif 10.tif 11.tif 12.tif 13.tif 14.tif 15.tif 16.tif 17.tif 18.tif 19.tif 20.tif 21.tif 22.tif 23.tif 24.tif 25.tif 26.tif 27.tif 28.tif 29.tif 30.tif 31.tif 32.tif 33.tif 34.tif 35.tif 36.tif 37.tif 38.tif 39.tif 40.tif 41.tif 42.tif 43.tif 44.tif 45.tif 46.tif 47.tif 48.tif 49.tif 50.tif 51.tif 52.tif 53.tif 54.tif 55.tif 56.tif 57.tif 58.tif" + +src_count = 0 + +cd /tmp +rm -f /tmp/*tif* +for s in ${SRC}; do + for f in ${files}; do + remote_target_file=${TARGET}/SRC_${src_count}/${f} + if aws s3 ls ${remote_target_file}; then + echo "Remote target file ${remote_target_file} already exists, skipping..." + continue + fi + + remote_src_file=${s}/${f} + local_src_file=SRC_${src_count}/${f} + echo "Now downloading ${remote_src_file} to ${local_src_file}" + time gsutil cp ${s}/${f} ${local_src_file} + echo "Done" + + local_warped_file=REPROJECTED_${src_count}/${f} + echo "Now warping ${local_src_file} to ${local_warped_file}" + time gdalwarp ${local_src_file} ${local_warped_file} -t_srs "${TARGET_CRS}" -co COMPRESS=DEFLATE -co TILED=yes + echo "Done warping ${local_src_file} to ${local_warped_file}" + + echo "Now uploading ${local_warped_file} to ${remote_target_file}" + time aws s3 cp ${local_warped_file} ${remote_target_file} + echo "Done uploading ${local_warped_file} to ${remote_target_file}" + + echo "Finally, deleting local files ${local_src_file} and ${local_warped_file}" + rm ${local_src_file} ${local_warped_file} + done + ((count++)) +done \ No newline at end of file From ba4a1a240578904598a2eaf813e8822beeda8e5f Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Tue, 29 Oct 2024 17:15:30 -0400 Subject: [PATCH 02/28] Whitespace breaks bash var assignments? --- batch/scripts/unify_projection.sh | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/batch/scripts/unify_projection.sh b/batch/scripts/unify_projection.sh index 4e7aca458..95995cf59 100644 --- a/batch/scripts/unify_projection.sh +++ b/batch/scripts/unify_projection.sh @@ -12,27 +12,14 @@ ME=$(basename "$0") echo "Reproject to a common CRS" -# Build an array of arguments to pass to unify_projection.py -#ARG_ARRAY=("--source" "${SRC}") -# -#ARG_ARRAY+=("--target" "${TARGET}") -# -#ARG_ARRAY+=("--target-crs" "${TARGET_CRS}") - -# Run unify_projection.py with the array of arguments -#unify_projection.py "${ARG_ARRAY[@]}" - -#gdist="gs://earthenginepartners-hansen/DIST-ALERT" - # files="" # for i in {2..60}; do files="$files $i.tif"; done # Skip 01.tif, 59.tif, and 60.tif for now (problems around the date line) files="02.tif 03.tif 04.tif 05.tif 06.tif 07.tif 08.tif 09.tif 10.tif 11.tif 12.tif 13.tif 14.tif 15.tif 16.tif 17.tif 18.tif 19.tif 20.tif 21.tif 22.tif 23.tif 24.tif 25.tif 26.tif 27.tif 28.tif 29.tif 30.tif 31.tif 32.tif 33.tif 34.tif 35.tif 36.tif 37.tif 38.tif 39.tif 40.tif 41.tif 42.tif 43.tif 44.tif 45.tif 46.tif 47.tif 48.tif 49.tif 50.tif 51.tif 52.tif 53.tif 54.tif 55.tif 56.tif 57.tif 58.tif" -src_count = 0 +src_count=0 cd /tmp -rm -f /tmp/*tif* for s in ${SRC}; do for f in ${files}; do remote_target_file=${TARGET}/SRC_${src_count}/${f} From a5a5c8ef1774df51363dcdc4eb93dcc1827e6e95 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Wed, 30 Oct 2024 14:39:08 -0400 Subject: [PATCH 03/28] Run unify projection step in parallel --- batch/scripts/_warp_and_upload.sh | 30 ++++++++++++++++++++++++++++++ batch/scripts/unify_projection.sh | 27 +++++++-------------------- 2 files changed, 37 insertions(+), 20 deletions(-) create mode 100644 batch/scripts/_warp_and_upload.sh diff --git a/batch/scripts/_warp_and_upload.sh b/batch/scripts/_warp_and_upload.sh new file mode 100644 index 000000000..064f8aafb --- /dev/null +++ b/batch/scripts/_warp_and_upload.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# arguments: +# $0 - The name of this script +# $1 - remote_src_file +# $2 - local_src_file +# $3 - local_warped_file +# $4 - target_crs +# $5 - remote target file + +if aws s3 ls "$1"; then + echo "Remote target file $1 already exists, skipping..." + exit 0 +fi + +echo "Now downloading $1 to $2" +time gsutil cp "$1" "$2" +echo "Done downloading $1 to $2" + +echo "Now warping $2 to $3" +time gdalwarp "$2" "$3" -t_srs "$4" -co COMPRESS=DEFLATE -co TILED=yes +echo "Done warping $2 to $3" + +echo "Now uploading $3 to $5" +time aws s3 cp "$3" "$5" +echo "Done uploading $3 to $5" + +echo "Finally, deleting local files $2 and $3" +rm "$2" "$3" +echo "Done deleting local files $2 and $3" diff --git a/batch/scripts/unify_projection.sh b/batch/scripts/unify_projection.sh index 95995cf59..e1a429a98 100644 --- a/batch/scripts/unify_projection.sh +++ b/batch/scripts/unify_projection.sh @@ -19,32 +19,19 @@ files="02.tif 03.tif 04.tif 05.tif 06.tif 07.tif 08.tif 09.tif 10.tif 11.tif 12. src_count=0 +CMD_ARGS=() + cd /tmp for s in ${SRC}; do for f in ${files}; do - remote_target_file=${TARGET}/SRC_${src_count}/${f} - if aws s3 ls ${remote_target_file}; then - echo "Remote target file ${remote_target_file} already exists, skipping..." - continue - fi - remote_src_file=${s}/${f} local_src_file=SRC_${src_count}/${f} - echo "Now downloading ${remote_src_file} to ${local_src_file}" - time gsutil cp ${s}/${f} ${local_src_file} - echo "Done" - local_warped_file=REPROJECTED_${src_count}/${f} - echo "Now warping ${local_src_file} to ${local_warped_file}" - time gdalwarp ${local_src_file} ${local_warped_file} -t_srs "${TARGET_CRS}" -co COMPRESS=DEFLATE -co TILED=yes - echo "Done warping ${local_src_file} to ${local_warped_file}" - - echo "Now uploading ${local_warped_file} to ${remote_target_file}" - time aws s3 cp ${local_warped_file} ${remote_target_file} - echo "Done uploading ${local_warped_file} to ${remote_target_file}" + remote_target_file=${TARGET}/SRC_${src_count}/${f} - echo "Finally, deleting local files ${local_src_file} and ${local_warped_file}" - rm ${local_src_file} ${local_warped_file} + CMD_ARGS+=("${remote_src_file}" "$local_src_file" "$local_warped_file" "$TARGET_CRS" "$remote_target_file") done ((count++)) -done \ No newline at end of file +done + +echo $CMD_ARGS | xargs -n 5 -P 32 _warp_and_upload.sh From 44540cb159292bebbfdc2d8a703557c641962456 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Wed, 30 Oct 2024 17:54:10 -0400 Subject: [PATCH 04/28] Fix expansion of CMD_ARGS --- batch/scripts/unify_projection.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batch/scripts/unify_projection.sh b/batch/scripts/unify_projection.sh index e1a429a98..a721a011b 100644 --- a/batch/scripts/unify_projection.sh +++ b/batch/scripts/unify_projection.sh @@ -34,4 +34,4 @@ for s in ${SRC}; do ((count++)) done -echo $CMD_ARGS | xargs -n 5 -P 32 _warp_and_upload.sh +echo "${CMD_ARGS[@]}" | xargs -n 5 -P 32 _warp_and_upload.sh From a8b3d34fb60a179eb767a4cee71d89006a3c923f Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Wed, 30 Oct 2024 19:09:39 -0400 Subject: [PATCH 05/28] Fix count/src_count mistake; insert possibly unnecessary brackets --- batch/scripts/unify_projection.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/batch/scripts/unify_projection.sh b/batch/scripts/unify_projection.sh index a721a011b..b28f68df7 100644 --- a/batch/scripts/unify_projection.sh +++ b/batch/scripts/unify_projection.sh @@ -21,7 +21,6 @@ src_count=0 CMD_ARGS=() -cd /tmp for s in ${SRC}; do for f in ${files}; do remote_src_file=${s}/${f} @@ -29,9 +28,9 @@ for s in ${SRC}; do local_warped_file=REPROJECTED_${src_count}/${f} remote_target_file=${TARGET}/SRC_${src_count}/${f} - CMD_ARGS+=("${remote_src_file}" "$local_src_file" "$local_warped_file" "$TARGET_CRS" "$remote_target_file") + CMD_ARGS+=("${remote_src_file}" "${local_src_file}" "${local_warped_file}" "${TARGET_CRS}" "${remote_target_file}") done - ((count++)) + ((src_count++)) done echo "${CMD_ARGS[@]}" | xargs -n 5 -P 32 _warp_and_upload.sh From 16a2cffa3e4207f4017764db13bc64faa4891c62 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Wed, 30 Oct 2024 21:45:43 -0400 Subject: [PATCH 06/28] Echo commands for debugging; fix ref to wrong arg in helper --- batch/scripts/_warp_and_upload.sh | 4 ++-- batch/scripts/unify_projection.sh | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/batch/scripts/_warp_and_upload.sh b/batch/scripts/_warp_and_upload.sh index 064f8aafb..bf928a9c8 100644 --- a/batch/scripts/_warp_and_upload.sh +++ b/batch/scripts/_warp_and_upload.sh @@ -8,8 +8,8 @@ # $4 - target_crs # $5 - remote target file -if aws s3 ls "$1"; then - echo "Remote target file $1 already exists, skipping..." +if aws s3 ls "$5"; then + echo "Remote target file $5 already exists, skipping..." exit 0 fi diff --git a/batch/scripts/unify_projection.sh b/batch/scripts/unify_projection.sh index b28f68df7..ab8627eea 100644 --- a/batch/scripts/unify_projection.sh +++ b/batch/scripts/unify_projection.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e +set -x # requires arguments # -s | --source From 686c8718fcaafabb3bf4bd364eda0105ed7047ae Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Wed, 30 Oct 2024 22:49:30 -0400 Subject: [PATCH 07/28] Fix counting srcs --- batch/scripts/unify_projection.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batch/scripts/unify_projection.sh b/batch/scripts/unify_projection.sh index ab8627eea..fb92617e1 100644 --- a/batch/scripts/unify_projection.sh +++ b/batch/scripts/unify_projection.sh @@ -31,7 +31,7 @@ for s in ${SRC}; do CMD_ARGS+=("${remote_src_file}" "${local_src_file}" "${local_warped_file}" "${TARGET_CRS}" "${remote_target_file}") done - ((src_count++)) + src_count=$(($src_count+1)) done echo "${CMD_ARGS[@]}" | xargs -n 5 -P 32 _warp_and_upload.sh From f8cafbdd604cd3d1fa62865d3ee724e09b769c67 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Thu, 31 Oct 2024 00:07:27 -0400 Subject: [PATCH 08/28] Switch to GDAL2TilesJob for access to awscli (installed in that container image) --- app/tasks/raster_tile_set_assets/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/tasks/raster_tile_set_assets/utils.py b/app/tasks/raster_tile_set_assets/utils.py index 5fae9b21f..3aea4338e 100644 --- a/app/tasks/raster_tile_set_assets/utils.py +++ b/app/tasks/raster_tile_set_assets/utils.py @@ -7,7 +7,7 @@ from app.models.enum.assets import AssetType from app.models.enum.pixetl import ResamplingMethod from app.models.pydantic.creation_options import PixETLCreationOptions -from app.models.pydantic.jobs import GDALDEMJob, Job, PixETLJob +from app.models.pydantic.jobs import GDALDEMJob, Job, PixETLJob, GDAL2TilesJob from app.settings.globals import ( AWS_GCS_KEY_SECRET_ARN, DEFAULT_JOB_DURATION, @@ -248,7 +248,7 @@ async def create_unify_projection_job( command.extend(["--target", target_prefix]) - return PixETLJob( + return GDAL2TilesJob( dataset=dataset, job_name=job_name, command=command, From 1b4dd608e058e1a994dd33fd9319cda02ebd4d92 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Thu, 31 Oct 2024 10:45:44 -0400 Subject: [PATCH 09/28] Bump GDAL container version for gsutils --- batch/gdal-python.dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batch/gdal-python.dockerfile b/batch/gdal-python.dockerfile index 773b430b8..c2d4a5986 100644 --- a/batch/gdal-python.dockerfile +++ b/batch/gdal-python.dockerfile @@ -1,4 +1,4 @@ -FROM globalforestwatch/data-api-gdal:v1.2.1 +FROM globalforestwatch/data-api-gdal:v1.2.2 # Copy scripts COPY ./batch/scripts/ /opt/scripts/ From 92b7c96a6b204a1ae6f66f964c3aff5f4ac4ab02 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Thu, 31 Oct 2024 12:51:17 -0400 Subject: [PATCH 10/28] Fix unify script by pre-making dirs --- batch/scripts/_warp_and_upload.sh | 2 ++ batch/scripts/unify_projection.sh | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/batch/scripts/_warp_and_upload.sh b/batch/scripts/_warp_and_upload.sh index bf928a9c8..b59fb54f9 100644 --- a/batch/scripts/_warp_and_upload.sh +++ b/batch/scripts/_warp_and_upload.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -e + # arguments: # $0 - The name of this script # $1 - remote_src_file diff --git a/batch/scripts/unify_projection.sh b/batch/scripts/unify_projection.sh index fb92617e1..21f306abc 100644 --- a/batch/scripts/unify_projection.sh +++ b/batch/scripts/unify_projection.sh @@ -1,7 +1,6 @@ #!/bin/bash set -e -set -x # requires arguments # -s | --source @@ -23,6 +22,9 @@ src_count=0 CMD_ARGS=() for s in ${SRC}; do + mkdir -p "SRC_${src_count}" + mkdir -p "REPROJECTED_${src_count}" + for f in ${files}; do remote_src_file=${s}/${f} local_src_file=SRC_${src_count}/${f} From e311629f46c326d5585ff967132078cd3870940c Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Thu, 31 Oct 2024 15:24:43 -0400 Subject: [PATCH 11/28] Fix only 1st src being used, I think --- batch/scripts/unify_projection.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batch/scripts/unify_projection.sh b/batch/scripts/unify_projection.sh index 21f306abc..aa2d62182 100644 --- a/batch/scripts/unify_projection.sh +++ b/batch/scripts/unify_projection.sh @@ -21,7 +21,7 @@ src_count=0 CMD_ARGS=() -for s in ${SRC}; do +for s in ${SRC[@]}; do mkdir -p "SRC_${src_count}" mkdir -p "REPROJECTED_${src_count}" From 8690d5c12f0cd0a916637109e4705698e3f6f697 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Thu, 31 Oct 2024 15:41:43 -0400 Subject: [PATCH 12/28] Allow for AWS source URIs --- batch/scripts/_warp_and_upload.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/batch/scripts/_warp_and_upload.sh b/batch/scripts/_warp_and_upload.sh index b59fb54f9..5968734ee 100644 --- a/batch/scripts/_warp_and_upload.sh +++ b/batch/scripts/_warp_and_upload.sh @@ -16,7 +16,11 @@ if aws s3 ls "$5"; then fi echo "Now downloading $1 to $2" -time gsutil cp "$1" "$2" +if [[ $1 == gs://* ]]; then + time gsutil cp "$1" "$2" +elif [[ $1 == s3://* ]]; then + time aws s3 cp --no-progress "$1" "$2" +fi echo "Done downloading $1 to $2" echo "Now warping $2 to $3" @@ -24,7 +28,7 @@ time gdalwarp "$2" "$3" -t_srs "$4" -co COMPRESS=DEFLATE -co TILED=yes echo "Done warping $2 to $3" echo "Now uploading $3 to $5" -time aws s3 cp "$3" "$5" +time aws s3 cp --no-progress "$3" "$5" echo "Done uploading $3 to $5" echo "Finally, deleting local files $2 and $3" From 6bbb9d0262e9592ff6e7dd86fcf2bbdcf05ec594 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Thu, 31 Oct 2024 15:42:58 -0400 Subject: [PATCH 13/28] Removing time commands --- batch/scripts/_warp_and_upload.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/batch/scripts/_warp_and_upload.sh b/batch/scripts/_warp_and_upload.sh index 5968734ee..d694c89e7 100644 --- a/batch/scripts/_warp_and_upload.sh +++ b/batch/scripts/_warp_and_upload.sh @@ -17,18 +17,18 @@ fi echo "Now downloading $1 to $2" if [[ $1 == gs://* ]]; then - time gsutil cp "$1" "$2" + gsutil cp "$1" "$2" elif [[ $1 == s3://* ]]; then - time aws s3 cp --no-progress "$1" "$2" + aws s3 cp --no-progress "$1" "$2" fi echo "Done downloading $1 to $2" echo "Now warping $2 to $3" -time gdalwarp "$2" "$3" -t_srs "$4" -co COMPRESS=DEFLATE -co TILED=yes +gdalwarp "$2" "$3" -t_srs "$4" -co COMPRESS=DEFLATE -co TILED=yes echo "Done warping $2 to $3" echo "Now uploading $3 to $5" -time aws s3 cp --no-progress "$3" "$5" +aws s3 cp --no-progress "$3" "$5" echo "Done uploading $3 to $5" echo "Finally, deleting local files $2 and $3" From 59e46d71fee0e3488c253109461c2b2ae62de690 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Thu, 31 Oct 2024 16:42:28 -0400 Subject: [PATCH 14/28] Change 'original' prefix to 'reprojected' --- app/tasks/raster_tile_set_assets/raster_tile_set_assets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py b/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py index 6e34b4563..1fa3cba15 100644 --- a/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py +++ b/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py @@ -76,7 +76,7 @@ async def raster_tile_set_asset( new_src_uris = list() for i,_ in enumerate(creation_options.source_uri): new_src_uris.append( - f"s3://{DATA_LAKE_BUCKET}/{dataset}/{version}/raster/{target_crs}/original/SRC_{i}" + f"s3://{DATA_LAKE_BUCKET}/{dataset}/{version}/raster/{target_crs}/reprojected/SRC_{i}" ) target_prefix = new_src_uris[0].rsplit("/", 1)[0] jobs.append( From 5460f3807b3e66cf6390f777b60c8d2ced06cd72 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Thu, 31 Oct 2024 16:49:40 -0400 Subject: [PATCH 15/28] Recursively pre-dl files --- batch/scripts/_warp_and_upload.sh | 39 ++++++++++++------------------- batch/scripts/unify_projection.sh | 39 ++++++++++++++++++++----------- 2 files changed, 40 insertions(+), 38 deletions(-) diff --git a/batch/scripts/_warp_and_upload.sh b/batch/scripts/_warp_and_upload.sh index d694c89e7..1ee99723e 100644 --- a/batch/scripts/_warp_and_upload.sh +++ b/batch/scripts/_warp_and_upload.sh @@ -4,33 +4,24 @@ set -e # arguments: # $0 - The name of this script -# $1 - remote_src_file -# $2 - local_src_file -# $3 - local_warped_file -# $4 - target_crs -# $5 - remote target file +# $1 - local_src_file +# $2 - local_warped_file +# $3 - target_crs +# $4 - remote target file -if aws s3 ls "$5"; then - echo "Remote target file $5 already exists, skipping..." +if aws s3 ls "$4"; then + echo "Remote target file $4 already exists, skipping..." exit 0 fi -echo "Now downloading $1 to $2" -if [[ $1 == gs://* ]]; then - gsutil cp "$1" "$2" -elif [[ $1 == s3://* ]]; then - aws s3 cp --no-progress "$1" "$2" -fi -echo "Done downloading $1 to $2" - -echo "Now warping $2 to $3" -gdalwarp "$2" "$3" -t_srs "$4" -co COMPRESS=DEFLATE -co TILED=yes -echo "Done warping $2 to $3" +echo "Now warping $1 to $2" +gdalwarp "$1" "$2" -t_srs "$3" -co COMPRESS=DEFLATE -co TILED=yes +echo "Done warping $1 to $2" -echo "Now uploading $3 to $5" -aws s3 cp --no-progress "$3" "$5" -echo "Done uploading $3 to $5" +echo "Now uploading $2 to $4" +aws s3 cp --no-progress "$2" "$4" +echo "Done uploading $2 to $4" -echo "Finally, deleting local files $2 and $3" -rm "$2" "$3" -echo "Done deleting local files $2 and $3" +echo "Finally, deleting local files $1 and $2" +rm "$1" "$2" +echo "Done deleting local files $1 and $2" diff --git a/batch/scripts/unify_projection.sh b/batch/scripts/unify_projection.sh index aa2d62182..cc4f53f61 100644 --- a/batch/scripts/unify_projection.sh +++ b/batch/scripts/unify_projection.sh @@ -12,27 +12,38 @@ ME=$(basename "$0") echo "Reproject to a common CRS" -# files="" -# for i in {2..60}; do files="$files $i.tif"; done -# Skip 01.tif, 59.tif, and 60.tif for now (problems around the date line) -files="02.tif 03.tif 04.tif 05.tif 06.tif 07.tif 08.tif 09.tif 10.tif 11.tif 12.tif 13.tif 14.tif 15.tif 16.tif 17.tif 18.tif 19.tif 20.tif 21.tif 22.tif 23.tif 24.tif 25.tif 26.tif 27.tif 28.tif 29.tif 30.tif 31.tif 32.tif 33.tif 34.tif 35.tif 36.tif 37.tif 38.tif 39.tif 40.tif 41.tif 42.tif 43.tif 44.tif 45.tif 46.tif 47.tif 48.tif 49.tif 50.tif 51.tif 52.tif 53.tif 54.tif 55.tif 56.tif 57.tif 58.tif" - src_count=0 - CMD_ARGS=() for s in ${SRC[@]}; do - mkdir -p "SRC_${src_count}" - mkdir -p "REPROJECTED_${src_count}" + source_dir="SRC_${src_count}" + mkdir -p "$source_dir" + + echo "Now recursively downloading $s to $source_dir" + if [[ $s == gs://* ]]; then + gsutil cp -m -r "$s" "$source_dir" + elif [[ $s == s3://* ]]; then + aws s3 cp --no-progress "$s" "$source_dir" + fi + echo "Done downloading $s to $source_dir" + + reprojected_dir="REPROJECTED_${src_count}" + mkdir -p "$reprojected_dir" + + cd $source_dir + for d in $((tree -dfi)); do + mkdir -p "../${reprojected_dir}/${d}" + done - for f in ${files}; do - remote_src_file=${s}/${f} - local_src_file=SRC_${src_count}/${f} - local_warped_file=REPROJECTED_${src_count}/${f} - remote_target_file=${TARGET}/SRC_${src_count}/${f} + for f in $((find . -iname "*.tif")); do + local_src_file="${source_dir}/${f}" + local_warped_file="${reprojected_dir}/${f}" + remote_target_file="${TARGET}/SRC_${src_count}/${f}" - CMD_ARGS+=("${remote_src_file}" "${local_src_file}" "${local_warped_file}" "${TARGET_CRS}" "${remote_target_file}") + CMD_ARGS+=("${local_src_file}" "${local_warped_file}" "${TARGET_CRS}" "${remote_target_file}") done + cd .. + src_count=$(($src_count+1)) done From 3455576b34215483c5a289605f4754f64b318b7c Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Thu, 31 Oct 2024 23:54:47 -0400 Subject: [PATCH 16/28] Fix gsutil command; fix aws dl command to be recursive --- batch/scripts/unify_projection.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/batch/scripts/unify_projection.sh b/batch/scripts/unify_projection.sh index cc4f53f61..02d6dd530 100644 --- a/batch/scripts/unify_projection.sh +++ b/batch/scripts/unify_projection.sh @@ -21,9 +21,9 @@ for s in ${SRC[@]}; do echo "Now recursively downloading $s to $source_dir" if [[ $s == gs://* ]]; then - gsutil cp -m -r "$s" "$source_dir" + gsutil -m cp -r "$s" "$source_dir" elif [[ $s == s3://* ]]; then - aws s3 cp --no-progress "$s" "$source_dir" + aws s3 cp --recursive --no-progress "$s" "$source_dir" fi echo "Done downloading $s to $source_dir" From 6e6838ab824387d678471b85a1d8951ee855baa5 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Fri, 1 Nov 2024 00:06:43 -0400 Subject: [PATCH 17/28] Set unify proj job to parent of pixetl job --- .../raster_tile_set_assets.py | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py b/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py index 1fa3cba15..8381615bc 100644 --- a/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py +++ b/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py @@ -71,6 +71,7 @@ async def raster_tile_set_asset( jobs: List[Job] = list() callback: Callback = callback_constructor(asset_id) + unify_job: Job | None = None if creation_options.unify_projection: target_crs = "epsg:4326" new_src_uris = list() @@ -79,18 +80,25 @@ async def raster_tile_set_asset( f"s3://{DATA_LAKE_BUCKET}/{dataset}/{version}/raster/{target_crs}/reprojected/SRC_{i}" ) target_prefix = new_src_uris[0].rsplit("/", 1)[0] - jobs.append( - await create_unify_projection_job( - dataset, creation_options.source_uri, target_prefix, target_crs, "unify_projection", callback - ) + unify_job = await create_unify_projection_job( + dataset, + creation_options.source_uri, + target_prefix, + target_crs, + "unify_projection", + callback ) - + jobs.append(unify_job) creation_options.source_uri = new_src_uris - jobs.append( await create_pixetl_job( - dataset, version, creation_options, "create_raster_tile_set", callback + dataset, + version, + creation_options, + "create_raster_tile_set", + callback, + [unify_job] if unify_job is not None else None, ) ) From 18fc7c8309b66b328a7fce7500fbb2cf04d592ab Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Fri, 1 Nov 2024 10:01:57 -0400 Subject: [PATCH 18/28] Remove double parens --- batch/scripts/unify_projection.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/batch/scripts/unify_projection.sh b/batch/scripts/unify_projection.sh index 02d6dd530..dd68b7c59 100644 --- a/batch/scripts/unify_projection.sh +++ b/batch/scripts/unify_projection.sh @@ -31,11 +31,11 @@ for s in ${SRC[@]}; do mkdir -p "$reprojected_dir" cd $source_dir - for d in $((tree -dfi)); do + for d in $(tree -dfi); do mkdir -p "../${reprojected_dir}/${d}" done - for f in $((find . -iname "*.tif")); do + for f in $(find . -iname "*.tif"); do local_src_file="${source_dir}/${f}" local_warped_file="${reprojected_dir}/${f}" remote_target_file="${TARGET}/SRC_${src_count}/${f}" From 71a3c2a37b97925a6236a001a94bd1b27efaa701 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Fri, 1 Nov 2024 10:55:11 -0400 Subject: [PATCH 19/28] Remove ./ from paths --- batch/scripts/unify_projection.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batch/scripts/unify_projection.sh b/batch/scripts/unify_projection.sh index dd68b7c59..0d8621a8f 100644 --- a/batch/scripts/unify_projection.sh +++ b/batch/scripts/unify_projection.sh @@ -35,7 +35,7 @@ for s in ${SRC[@]}; do mkdir -p "../${reprojected_dir}/${d}" done - for f in $(find . -iname "*.tif"); do + for f in $(find . -iname "*.tif"| sed 's/.\///'); do local_src_file="${source_dir}/${f}" local_warped_file="${reprojected_dir}/${f}" remote_target_file="${TARGET}/SRC_${src_count}/${f}" From d0c56d8394c706a6928bcb98897d6ea1feb04f0b Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Fri, 1 Nov 2024 16:24:37 -0400 Subject: [PATCH 20/28] Replace tree (not in image) with find --- batch/scripts/unify_projection.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batch/scripts/unify_projection.sh b/batch/scripts/unify_projection.sh index 0d8621a8f..1f2be75ff 100644 --- a/batch/scripts/unify_projection.sh +++ b/batch/scripts/unify_projection.sh @@ -31,7 +31,7 @@ for s in ${SRC[@]}; do mkdir -p "$reprojected_dir" cd $source_dir - for d in $(tree -dfi); do + for d in $(find . -type d | sed 's/.\///'); do mkdir -p "../${reprojected_dir}/${d}" done From 10b7765c2b001d6edd8f03487d6be0e44f39912b Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Sat, 2 Nov 2024 19:25:45 -0400 Subject: [PATCH 21/28] Fix grabbing 5 args with xargs when should have grabbed 4 --- batch/scripts/unify_projection.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/batch/scripts/unify_projection.sh b/batch/scripts/unify_projection.sh index 1f2be75ff..89c29dbbf 100644 --- a/batch/scripts/unify_projection.sh +++ b/batch/scripts/unify_projection.sh @@ -15,7 +15,7 @@ echo "Reproject to a common CRS" src_count=0 CMD_ARGS=() -for s in ${SRC[@]}; do +for s in "${SRC[@]}"; do source_dir="SRC_${src_count}" mkdir -p "$source_dir" @@ -30,7 +30,7 @@ for s in ${SRC[@]}; do reprojected_dir="REPROJECTED_${src_count}" mkdir -p "$reprojected_dir" - cd $source_dir + cd "${source_dir}" for d in $(find . -type d | sed 's/.\///'); do mkdir -p "../${reprojected_dir}/${d}" done @@ -47,4 +47,4 @@ for s in ${SRC[@]}; do src_count=$(($src_count+1)) done -echo "${CMD_ARGS[@]}" | xargs -n 5 -P 32 _warp_and_upload.sh +echo "${CMD_ARGS[@]}" | xargs -n 4 -P 32 _warp_and_upload.sh From 6634b8ff93ed8e80462bd1ccf2e38e99f16551ba Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Sat, 2 Nov 2024 21:48:08 -0400 Subject: [PATCH 22/28] re-center TIFFs that cross the dateline --- batch/scripts/_tiff_crosses_dateline.sh | 42 +++++++++++++++++++++++++ batch/scripts/_warp_and_upload.sh | 13 +++++++- 2 files changed, 54 insertions(+), 1 deletion(-) create mode 100755 batch/scripts/_tiff_crosses_dateline.sh diff --git a/batch/scripts/_tiff_crosses_dateline.sh b/batch/scripts/_tiff_crosses_dateline.sh new file mode 100755 index 000000000..b6f183c6f --- /dev/null +++ b/batch/scripts/_tiff_crosses_dateline.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# +# Small Script to check if input raster will +# cross dateline when converting to EPSG:4326 +# +# USAGE: ./crosses_dateline.sh infile [outfile] +# +# if no outfile is given, the script returns "true" or "false" +# +# Needs gdal 2.0+ and Python +# +# Credit: Slightly modified from https://gis.stackexchange.com/a/222341 + + +if [ -z "${1}" ]; then + echo -e "Error: No input rasterfile given.\n> USAGE: ./crosses_dateline.sh infile" + exit 1 +fi + +# Get information, save it to variable as we need it several times +gdalinfo=$(gdalinfo "${1}" -json) + +# If -json switch is not available exit! +if [ ! -z $(echo $gdalinfo | grep "^Usage:") ]; then + echo -e "Error: GDAL command failed, Version 2.0+ is needed" + exit 1 +fi + +function jsonq { + echo "${1}" | python -c "import json,sys; jdata = sys.stdin.read(); data = json.loads(jdata); print(data${2});" +} + +ulx=$(jsonq "$gdalinfo" "['wgs84Extent']['coordinates'][0][0][0]") +llx=$(jsonq "$gdalinfo" "['wgs84Extent']['coordinates'][0][1][0]") +lrx=$(jsonq "$gdalinfo" "['wgs84Extent']['coordinates'][0][3][0]") +urx=$(jsonq "$gdalinfo" "['wgs84Extent']['coordinates'][0][2][0]") + +crossing_dateline=false +test $(echo "${ulx}>${lrx}" | bc) -eq 1 && crossing_dateline=true +test $(echo "${llx}>${urx}" | bc) -eq 1 && crossing_dateline=true + +echo -n "${crossing_dateline}" \ No newline at end of file diff --git a/batch/scripts/_warp_and_upload.sh b/batch/scripts/_warp_and_upload.sh index 1ee99723e..39ce16193 100644 --- a/batch/scripts/_warp_and_upload.sh +++ b/batch/scripts/_warp_and_upload.sh @@ -14,8 +14,19 @@ if aws s3 ls "$4"; then exit 0 fi +warp_options=("-co COMPRESS=DEFLATE" "-co TILED=yes") + +echo "Seeing if TIFF crosses the dateline" +crosses="$(./_tiff_crosses_dateline.sh $1)" +if [ "${crosses}" = "true" ]; then + echo "$1 crosses the dateline" + warp_options+=("--config CENTER_LONG 180") +else + echo "$1 does not cross the dateline" +fi + echo "Now warping $1 to $2" -gdalwarp "$1" "$2" -t_srs "$3" -co COMPRESS=DEFLATE -co TILED=yes +gdalwarp "$1" "$2" -t_srs "$3" "${warp_options[@]}" echo "Done warping $1 to $2" echo "Now uploading $2 to $4" From 98f481608badc91e1ba4a565c1fcb00cc6d994cb Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Sat, 2 Nov 2024 22:49:42 -0400 Subject: [PATCH 23/28] Possibly fix running _tiff_crosses_dateline.sh --- batch/scripts/_warp_and_upload.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batch/scripts/_warp_and_upload.sh b/batch/scripts/_warp_and_upload.sh index 39ce16193..98e3f47d9 100644 --- a/batch/scripts/_warp_and_upload.sh +++ b/batch/scripts/_warp_and_upload.sh @@ -17,7 +17,7 @@ fi warp_options=("-co COMPRESS=DEFLATE" "-co TILED=yes") echo "Seeing if TIFF crosses the dateline" -crosses="$(./_tiff_crosses_dateline.sh $1)" +crosses="$(_tiff_crosses_dateline.sh $1)" if [ "${crosses}" = "true" ]; then echo "$1 crosses the dateline" warp_options+=("--config CENTER_LONG 180") From e7614ae1a87ae52d913f08f6290e916dcd89735f Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Sun, 3 Nov 2024 01:33:48 -0500 Subject: [PATCH 24/28] Replace bc with Python; hopefully fix gdalwarp arg complaints --- batch/scripts/_tiff_crosses_dateline.sh | 4 ++-- batch/scripts/_warp_and_upload.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/batch/scripts/_tiff_crosses_dateline.sh b/batch/scripts/_tiff_crosses_dateline.sh index b6f183c6f..68995c486 100755 --- a/batch/scripts/_tiff_crosses_dateline.sh +++ b/batch/scripts/_tiff_crosses_dateline.sh @@ -36,7 +36,7 @@ lrx=$(jsonq "$gdalinfo" "['wgs84Extent']['coordinates'][0][3][0]") urx=$(jsonq "$gdalinfo" "['wgs84Extent']['coordinates'][0][2][0]") crossing_dateline=false -test $(echo "${ulx}>${lrx}" | bc) -eq 1 && crossing_dateline=true -test $(echo "${llx}>${urx}" | bc) -eq 1 && crossing_dateline=true +test $(python -c "print(${ulx}>${lrx})") = True && crossing_dateline=true +test $(python -c "print(${ulx}>${lrx})") = True && crossing_dateline=true echo -n "${crossing_dateline}" \ No newline at end of file diff --git a/batch/scripts/_warp_and_upload.sh b/batch/scripts/_warp_and_upload.sh index 98e3f47d9..fd7295aec 100644 --- a/batch/scripts/_warp_and_upload.sh +++ b/batch/scripts/_warp_and_upload.sh @@ -14,13 +14,13 @@ if aws s3 ls "$4"; then exit 0 fi -warp_options=("-co COMPRESS=DEFLATE" "-co TILED=yes") +warp_options=("-co" "COMPRESS=DEFLATE" "-co" "TILED=yes") echo "Seeing if TIFF crosses the dateline" crosses="$(_tiff_crosses_dateline.sh $1)" if [ "${crosses}" = "true" ]; then echo "$1 crosses the dateline" - warp_options+=("--config CENTER_LONG 180") + warp_options+=("--config" "CENTER_LONG" "180") else echo "$1 does not cross the dateline" fi From 459d90d871ec79ca955e41d21d180a9032dfbb55 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Sun, 3 Nov 2024 12:17:15 -0500 Subject: [PATCH 25/28] Fix prefix name (epsg:4326 -> epsg-4326) to be consistent --- app/tasks/raster_tile_set_assets/raster_tile_set_assets.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py b/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py index 8381615bc..87ef0fb87 100644 --- a/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py +++ b/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py @@ -77,7 +77,8 @@ async def raster_tile_set_asset( new_src_uris = list() for i,_ in enumerate(creation_options.source_uri): new_src_uris.append( - f"s3://{DATA_LAKE_BUCKET}/{dataset}/{version}/raster/{target_crs}/reprojected/SRC_{i}" + f"s3://{DATA_LAKE_BUCKET}/{dataset}/{version}/raster/" + f"{target_crs.replace(':', '-')}/reprojected/SRC_{i}" ) target_prefix = new_src_uris[0].rsplit("/", 1)[0] unify_job = await create_unify_projection_job( From c4b4a27bae50c20461aa8b4ce955a627cbe4fefd Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Tue, 5 Nov 2024 14:38:24 -0500 Subject: [PATCH 26/28] Improve docs in response to Dan's PR comments --- .../raster_tile_set_assets.py | 2 +- app/tasks/raster_tile_set_assets/utils.py | 9 +++++++-- batch/scripts/_tiff_crosses_dateline.sh | 16 +++++++--------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py b/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py index 87ef0fb87..795896757 100644 --- a/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py +++ b/app/tasks/raster_tile_set_assets/raster_tile_set_assets.py @@ -71,7 +71,7 @@ async def raster_tile_set_asset( jobs: List[Job] = list() callback: Callback = callback_constructor(asset_id) - unify_job: Job | None = None + unify_job: Optional[Job] = None if creation_options.unify_projection: target_crs = "epsg:4326" new_src_uris = list() diff --git a/app/tasks/raster_tile_set_assets/utils.py b/app/tasks/raster_tile_set_assets/utils.py index 3aea4338e..b6b3069f0 100644 --- a/app/tasks/raster_tile_set_assets/utils.py +++ b/app/tasks/raster_tile_set_assets/utils.py @@ -233,8 +233,13 @@ async def create_unify_projection_job( target_crs: str, job_name: str, callback: Callback -): - """ +) -> GDAL2TilesJob: + """Creates a Batch job that takes all files indicated in old_source_uris + and re-projects each to a common CRS, then places them in a mirror of the + original directory structure under the target_prefix, divided by source + number. More specifically, the files from the first source URI will be + put at /SRC_0, the files from the second under + /SRC_1, and so on. """ command = [ diff --git a/batch/scripts/_tiff_crosses_dateline.sh b/batch/scripts/_tiff_crosses_dateline.sh index 68995c486..32cd50af5 100755 --- a/batch/scripts/_tiff_crosses_dateline.sh +++ b/batch/scripts/_tiff_crosses_dateline.sh @@ -1,26 +1,24 @@ #!/bin/bash # -# Small Script to check if input raster will -# cross dateline when converting to EPSG:4326 +# USAGE: _tiff_crosses_dateline.sh raster_file # -# USAGE: ./crosses_dateline.sh infile [outfile] +# Prints the string "true" if the input raster will cross the dateline +# when converting to EPSG:4326, "false" otherwise # -# if no outfile is given, the script returns "true" or "false" -# -# Needs gdal 2.0+ and Python +# Needs GDAL 2.0+ and Python # # Credit: Slightly modified from https://gis.stackexchange.com/a/222341 if [ -z "${1}" ]; then - echo -e "Error: No input rasterfile given.\n> USAGE: ./crosses_dateline.sh infile" + echo -e "Error: No input raster file given.\n> USAGE: _tiff_crosses_dateline.sh raster_file" exit 1 fi -# Get information, save it to variable as we need it several times +# Get raster info, save it to a variable as we need it several times gdalinfo=$(gdalinfo "${1}" -json) -# If -json switch is not available exit! +# Exit if -json switch is not available if [ ! -z $(echo $gdalinfo | grep "^Usage:") ]; then echo -e "Error: GDAL command failed, Version 2.0+ is needed" exit 1 From 036eab51d7a455876525545dffcbbce4c3dca5ac Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Thu, 7 Nov 2024 11:55:09 -0500 Subject: [PATCH 27/28] Fix copy/paste error --- batch/scripts/_tiff_crosses_dateline.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batch/scripts/_tiff_crosses_dateline.sh b/batch/scripts/_tiff_crosses_dateline.sh index 32cd50af5..f13405199 100755 --- a/batch/scripts/_tiff_crosses_dateline.sh +++ b/batch/scripts/_tiff_crosses_dateline.sh @@ -35,6 +35,6 @@ urx=$(jsonq "$gdalinfo" "['wgs84Extent']['coordinates'][0][2][0]") crossing_dateline=false test $(python -c "print(${ulx}>${lrx})") = True && crossing_dateline=true -test $(python -c "print(${ulx}>${lrx})") = True && crossing_dateline=true +test $(python -c "print(${llx}>${urx})") = True && crossing_dateline=true echo -n "${crossing_dateline}" \ No newline at end of file From a009eab346ac3b7e3b66085c9691c905e2969683 Mon Sep 17 00:00:00 2001 From: Solomon Negusse Date: Mon, 11 Nov 2024 14:21:24 +0300 Subject: [PATCH 28/28] clear cloudfront caches --- app/routes/datasets/versions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/app/routes/datasets/versions.py b/app/routes/datasets/versions.py index d25175b5e..50449b7f2 100644 --- a/app/routes/datasets/versions.py +++ b/app/routes/datasets/versions.py @@ -177,6 +177,7 @@ async def update_version( AssetType.dynamic_vector_tile_cache, AssetType.static_vector_tile_cache, AssetType.raster_tile_cache, + AssetType.cog, ], )