From 9bf3b35308395444d2688875d94fd925e1eca56e Mon Sep 17 00:00:00 2001 From: Justin Terry Date: Thu, 29 Aug 2024 14:52:08 -0700 Subject: [PATCH 1/2] Ingest COGs directly into GEE --- batch/python/export_to_gee.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/batch/python/export_to_gee.py b/batch/python/export_to_gee.py index 235fa0d64..ba930b37e 100644 --- a/batch/python/export_to_gee.py +++ b/batch/python/export_to_gee.py @@ -48,10 +48,7 @@ def upload_cog_to_gcs(dataset, implementation): return f"gs://{GCS_BUCKET}/{dataset}/{implementation}.tif" -def create_cog_backed_asset(dataset, implementation, gcs_path, service_account): - credentials = ee.ServiceAccountCredentials(service_account, GCS_CREDENTIALS_FILE) - ee.Initialize(credentials) - +def create_cog_backed_asset(dataset, implementation, gcs_path, credentials): # delete any existing asset with the same dataset/implementation try: ee.data.deleteAsset(f"projects/{EE_PROJECT}/assets/{dataset}/{implementation}") @@ -84,6 +81,22 @@ def create_cog_backed_asset(dataset, implementation, gcs_path, service_account): f"GEE returned unexpected status code {response.status_code} with payload {response.content}" ) + return asset_id + + +def ingest_in_gee(dataset, implementation, gcs_path): + """Ingest directly into GEE as a best effort task.""" + asset_id = f"{dataset}/{implementation}" + request_id = ee.data.newTaskId()[0] + params = { + "name": f"projects/{EE_PROJECT}/assets/{asset_id}", + "tilesets": [{"sources": [{"uris": [gcs_path]}]}], + } + ee.data.startIngestion(request_id=request_id, params=params) + return asset_id + + +def set_acl_to_anyone_read(asset_id): # update ACL to be public full_asset_id = f"projects/{EE_PROJECT}/assets/{asset_id}" acl = ee.data.getAssetAcl(full_asset_id) @@ -96,8 +109,14 @@ def export_to_gee( implementation: str = Option(..., help="Implementation name."), ): service_account = set_google_application_credentials() + + # initialize GEE + credentials = ee.ServiceAccountCredentials(service_account, GCS_CREDENTIALS_FILE) + ee.Initialize(credentials) + gcs_path = upload_cog_to_gcs(dataset, implementation) - create_cog_backed_asset(dataset, implementation, gcs_path, service_account) + asset_id = (dataset, implementation, gcs_path, service_account) + set_acl_to_anyone_read(asset_id) if __name__ == "__main__": From 87f51576afa29411c684a2832a477901bfbe079a Mon Sep 17 00:00:00 2001 From: Justin Terry Date: Fri, 27 Sep 2024 16:21:59 -0700 Subject: [PATCH 2/2] Fix issues in script --- batch/python/export_to_gee.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/batch/python/export_to_gee.py b/batch/python/export_to_gee.py index ba930b37e..302839a6f 100644 --- a/batch/python/export_to_gee.py +++ b/batch/python/export_to_gee.py @@ -86,6 +86,22 @@ def create_cog_backed_asset(dataset, implementation, gcs_path, credentials): def ingest_in_gee(dataset, implementation, gcs_path): """Ingest directly into GEE as a best effort task.""" + # delete any existing asset with the same dataset/implementation + try: + ee.data.deleteAsset(f"projects/{EE_PROJECT}/assets/{dataset}/{implementation}") + except ee.EEException: + # asset doesn't exist + pass + + # create dataset folder if it doesn't exist + try: + ee.data.createAsset( + {"type": "Folder"}, f"projects/{EE_PROJECT}/assets/{dataset}" + ) + except ee.EEException: + # folder already exists + pass + asset_id = f"{dataset}/{implementation}" request_id = ee.data.newTaskId()[0] params = { @@ -115,8 +131,7 @@ def export_to_gee( ee.Initialize(credentials) gcs_path = upload_cog_to_gcs(dataset, implementation) - asset_id = (dataset, implementation, gcs_path, service_account) - set_acl_to_anyone_read(asset_id) + ingest_in_gee(dataset, implementation, gcs_path) if __name__ == "__main__":