Skip to content

Commit

Permalink
Updated code to support GEBCO 2024 dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
elidwa committed Nov 15, 2024
1 parent d06829d commit 0f380ac
Show file tree
Hide file tree
Showing 6 changed files with 217 additions and 29 deletions.
28 changes: 27 additions & 1 deletion datasets/gebco/package/GebcoBathyRaster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,32 @@ GebcoBathyRaster::GebcoBathyRaster(lua_State* L, RequestFields* rqst_parms, cons
filePath("/vsis3/" + std::string(parms->asset.asset->getPath())),
indexFile(parms->asset.asset->getIndex())
{
/*
* To support datasets from different years, we use the 'bands' parameter to identify which year's data to sample.
* This band parameter must be cleared in 'parms' since it does not correspond to an actual band.
*/
std::string year("2024");
if(parms->bands.length() == 0)
{
mlog(INFO, "Using latest GEBCO data from %s", year.c_str());
}
else if(parms->bands.length() == 1)
{
if(parms->bands[0] == "2023" || parms->bands[0] == "2024")
{
year = parms->bands[0];
mlog(INFO, "Using GEBCO data from %s", year.c_str());

/* Clear params->bands */
const_cast<FieldList<std::string>&>(parms->bands).clear();
}
else throw RunTimeException(CRITICAL, RTE_ERROR, "Invalid band name specified");
}
else throw RunTimeException(CRITICAL, RTE_ERROR, "Invalid number of bands specified");

/* Build data path on s3 */
filePath += "/" + year;
mlog(DEBUG, "Using data path: %s", filePath.c_str());
}

/*----------------------------------------------------------------------------
Expand All @@ -62,7 +88,7 @@ void GebcoBathyRaster::getIndexFile(const OGRGeometry* geo, std::string& file, c
static_cast<void>(geo);
static_cast<void>(points);
file = filePath + "/" + indexFile;
mlog(DEBUG, "Using %s", file.c_str());
mlog(DEBUG, "Using index file: %s", file.c_str());
}


Expand Down
61 changes: 59 additions & 2 deletions datasets/gebco/selftests/gebco_reader.lua
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,16 @@ local lons = {-40, -20, -120}
local lats = { 70, -20, 20}
height = 0

local expDepth = { -64, -4933, -4072}
-- Updataed results for GEBCO 2024
local expDepth = { -64, -4940, -4072}
local expFlags = { 70, 40, 44}

-- Tolerance for depth values is needed because of the different versions of PROJLIB
-- The values are different in the different versions of the library.
-- We add 1 meter of depth tolerance for the test to pass
local depth_tolerance = 1;

print(string.format("\n--------------------------------\nTest: GEBCO Correct Values\n--------------------------------"))
print(string.format("\n--------------------------------\nTest: GEBCO Correct Values default with no band specified\n--------------------------------"))
local dem = geo.raster(geo.parms({ asset = "gebco-bathy", algorithm = "NearestNeighbour", with_flags=true, sort_by_index = true }))
runner.check(dem ~= nil)

Expand All @@ -49,6 +50,62 @@ for j, lon in ipairs(lons) do
end
end

print(string.format("\n--------------------------------\nTest: GEBCO Correct Values band=2024\n--------------------------------"))
dem = geo.raster(geo.parms({ asset = "gebco-bathy", bands = {"2024"}, algorithm = "NearestNeighbour", with_flags=true, sort_by_index = true }))
runner.check(dem ~= nil)

for j, lon in ipairs(lons) do
lat = lats[j]
tbl, err = dem:sample(lon, lat, height)
runner.check(err == 0)
runner.check(tbl ~= nil)

if err ~= 0 then
print(string.format("Point: %d, (%.3f, %.3f) ======> FAILED to read, err# %d",j, lon, lat, err))
else
local el, fname
for k, v in ipairs(tbl) do
value = v["value"]
fname = v["file"]
flags = v["flags"]
print(string.format("(%02d) (%6.1f, %5.1f) %8.1fm %02d %s", k, lon, lat, value, flags, fname))

assert( math.abs(value - expDepth[j]) < depth_tolerance, string.format("Point: %d, (%.3f, %.3f) ======> FAILED",j, lon, lat))
assert(flags == expFlags[j], string.format("Point: %d, (%.3f, %.3f) ======> FAILED",j, lon, lat))
end
end
end


-- Different results for GEBCO 2023
expDepth = { -64, -4933, -4072}

print(string.format("\n--------------------------------\nTest: GEBCO Correct Values band=2023\n--------------------------------"))
dem = geo.raster(geo.parms({ asset = "gebco-bathy", bands = {"2023"}, algorithm = "NearestNeighbour", with_flags=true, sort_by_index = true }))
runner.check(dem ~= nil)

for j, lon in ipairs(lons) do
lat = lats[j]
tbl, err = dem:sample(lon, lat, height)
runner.check(err == 0)
runner.check(tbl ~= nil)

if err ~= 0 then
print(string.format("Point: %d, (%.3f, %.3f) ======> FAILED to read, err# %d",j, lon, lat, err))
else
local el, fname
for k, v in ipairs(tbl) do
value = v["value"]
fname = v["file"]
flags = v["flags"]
print(string.format("(%02d) (%6.1f, %5.1f) %8.1fm %02d %s", k, lon, lat, value, flags, fname))

assert( math.abs(value - expDepth[j]) < depth_tolerance, string.format("Point: %d, (%.3f, %.3f) ======> FAILED",j, lon, lat))
assert(flags == expFlags[j], string.format("Point: %d, (%.3f, %.3f) ======> FAILED",j, lon, lat))
end
end
end

-- Report Results --

runner.report()
Expand Down
47 changes: 47 additions & 0 deletions datasets/gebco/utils/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
GEBCO Data Processing Guide

The GEBCO data is not hosted on AWS. Users must download the compressed files and run the provided scripts to create COGs and a GeoJSON index file.
Below are the steps to perform this process. In these examples, the year "2024" is used, but the Python scripts support the "year" parameter for other years as well.

*** Create Directory Structure ***
Create the following directories with the specified year in the path:

/data/GEBCO/2024/TIFS
/data/GEBCO/2024/COGS

*** Download the GEBCO Data ***
Go to: https://www.gebco.net/data_and_products/gridded_bathymetry_data/#a1
Find the table entry "GEBCO_2024 Grid (sub-ice topo/bathy)" download DataGeoTiff
It should be named something like: gebco_2024_sub_ice_topo_geotiff.zip

Find the table entry "GEBCO_2024 TID Grid" download DataGeoTiff
It should be named something like: gebco_2024_tid_geotiff.zip

Uncompress both files into: /data/GEBCO/2024/TIFS
>> unzip gebco_2024_sub_ice_topo_geotiff.zip -d /data/GEBCO/2024/TIFS
>> unzip gebco_2024_tid_geotiff.zip -d /data/GEBCO/2024/TIFS


*** Run the Python Scripts ***
From the utils directory where the scripts are located, run the following scripts in the specified order.
Make sure to include the year as a parameter.

Step 1: Run the script cog_maker.py with the year as a parameter
>> python cog_maker.py 2024

Step 2: Run the script index_maker.py with the year as a parameter
>> python index_maker.py 2024


*** Upload to Sliderule S3 Bucket ***
In the Sliderule public bucket (s3://sliderule/data/GEBCO/), create a new directory for the year, e.g., 2024.

Upload all COG tif files and index.geojso from /data/GEBCO/2024/COGS to: s3://sliderule/data/GEBCO/2024
>> aws s3 cp . s3://sliderule/data/GEBCO/2024/ --recursive



Upload the original downloaded data files to: s3://sliderule/data/GEBCO/2024
>> aws s3 cp gebco_2024_sub_ice_topo_geotiff.zip s3://sliderule/data/GEBCO/2024/gebco_2024_sub_ice_topo_geotiff.zip
>> aws s3 cp gebco_2024_tid_geotiff.zip s3://sliderule/data/GEBCO/2024/gebco_2024_tid_geotiff.zip

59 changes: 49 additions & 10 deletions datasets/gebco/utils/cog_maker.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,37 @@
import os
from osgeo import gdal, org
import sys
from osgeo import gdal

# Enable GDAL exceptions to avoid future warnings
gdal.UseExceptions()

# Set GDAL configurations for large cache and multithreading
gdal.SetConfigOption("GDAL_CACHEMAX", "1024") # 1024 MB cache
gdal.SetConfigOption("GDAL_NUM_THREADS", "ALL_CPUS") # Use all available CPU cores

def convert_geotiffs_to_cogs(tiffs_dir, cogs_directory):
def convert_geotiffs_to_cogs(year, tiffs_dir, cogs_dir):
# Empty the COGS directory if it exists
if os.path.exists(cogs_directory):
for file in os.listdir(cogs_directory):
os.remove(os.path.join(cogs_directory, file))
if os.path.exists(cogs_dir):
for file in os.listdir(cogs_dir):
os.remove(os.path.join(cogs_dir, file))
else:
os.makedirs(cogs_directory)
os.makedirs(cogs_dir)

# List of GeoTIFF files to convert to COGs
geotiff_files = [f for f in os.listdir(tiffs_dir) if f.endswith(".tif")]

# tif files have year in their name, example of 2023 tif file:
# gebco_2023_sub_ice_n0.0_s-90.0_w-180.0_e-90.0.tif
# Check if the year is part of the file name
for geotiff_file in geotiff_files:
if year not in geotiff_file:
print(f"Error: File '{geotiff_file}' does not contain the year '{year}' in its name.")
sys.exit(1)

# Convert each GeoTIFF to a Cloud-Optimized GeoTIFF
for geotiff_file in geotiff_files:
input_path = os.path.join(tiffs_dir, geotiff_file)
output_cog_path = os.path.join(cogs_directory, f"cog_{geotiff_file}")
output_cog_path = os.path.join(cogs_dir, f"cog_{geotiff_file}")
gdal.Translate(
output_cog_path,
input_path,
Expand All @@ -38,7 +50,34 @@ def convert_geotiffs_to_cogs(tiffs_dir, cogs_directory):
# Convert original tif files to COGs
# https://www.gebco.net/data_and_products/gridded_bathymetry_data/#area

tifs_directory = "/data/GEBCO/2023/TIFS"
cogs_directory = "/data/GEBCO/2023/COGS"
convert_geotiffs_to_cogs(tifs_directory, cogs_directory)
if __name__ == "__main__":
# List of supported years
supported_years = ["2023", "2024"]

if len(sys.argv) != 2:
print("Usage: python cog_maker.py <year>")
sys.exit(1)

# Get the year from the command line argument
year = sys.argv[1]

# Ensure the provided year is valid
if year not in supported_years:
print(f"Error: Invalid year. Supported years are: {', '.join(supported_years)}")
sys.exit(1)

# Compose directory paths based on the year
tifs_dir = f"/data/GEBCO/{year}/TIFS"
cogs_dir = f"/data/GEBCO/{year}/COGS"

# Check if the directories exist
if not os.path.exists(tifs_dir):
print(f"Error: Directory '{tifs_dir}' does not exist.")
sys.exit(1)

if not os.path.exists(cogs_dir):
print(f"Error: Directory '{cogs_dir}' does not exist.")
sys.exit(1)

# Call the conversion function
convert_geotiffs_to_cogs(year, tifs_dir, cogs_dir)
49 changes: 34 additions & 15 deletions datasets/gebco/utils/index_maker.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
import os
import sys
from osgeo import gdal, ogr

# Enable GDAL exceptions to avoid future warnings
gdal.UseExceptions()

def create_geojson(directory_path, geojson_name):
if not os.path.exists(directory_path):
raise ValueError(f"The directory '{directory_path}' does not exist.")
def create_geojson(dir_path, index_file):
if not os.path.exists(dir_path):
raise ValueError(f"The directory '{dir_path}' does not exist.")

# Find all .tif files in the directory
tif_files = [os.path.join(directory_path, f) for f in os.listdir(directory_path) if f.endswith(".tif")]
tif_files = [os.path.join(dir_path, f) for f in os.listdir(dir_path) if f.endswith(".tif")]

if not tif_files:
raise ValueError("No .vrt files found in the specified directory.")
raise ValueError(f"No .tif files found in directory: {dir_path}")

# Set the output path for the geojson file in the same directory
geojson_output_path = os.path.join(directory_path, geojson_name)
geojson_output_path = os.path.join(dir_path, index_file)

# Remove if it already exists
if os.path.exists(geojson_output_path):
Expand Down Expand Up @@ -48,7 +51,7 @@ def create_geojson(directory_path, geojson_name):
if '_tid_' in tif_file:
continue

print(f"TIF File: {tif_file}")
# print(f"Processed: {tif_file}")

# Get the geotransform and size of the raster
geotransform = raster_ds.GetGeoTransform()
Expand Down Expand Up @@ -80,9 +83,9 @@ def create_geojson(directory_path, geojson_name):
feature.SetField("id", id)
id += 1

# Set the "datetime" attribute to the middle of 2023
middle_of_2023 = "2023-07-02T00:00:00Z"
feature.SetField("datetime", middle_of_2023)
# Set the "datetime" attribute to the middle of the specified year
middle_of_year = f"{year}-07-02T00:00:00Z"
feature.SetField("datetime", middle_of_year)

# Remove the directory path from the tif file name
tif_file = os.path.basename(tif_file)
Expand All @@ -101,11 +104,27 @@ def create_geojson(directory_path, geojson_name):


geojson_ds.Destroy() # Close and save the GeoJSON file
print(f"GeoJSON created at: {geojson_output_path}")
print(f"GeoJSON index file created at: {geojson_output_path}")


# Make index geojson for tif files
cogs_directory = "/data/GEBCO/2023/COGS"
geojson_name = "index.geojson"
create_geojson(cogs_directory, geojson_name)
if __name__ == "__main__":
# List of supported years
supported_years = ["2023", "2024"]

if len(sys.argv) != 2:
print("Usage: python index_maker.py <year>")
sys.exit(1)

# Get the year from the command line argument
year = sys.argv[1]

# Ensure the provided year is valid
if year not in supported_years:
print(f"Error: Invalid year. Supported years are: {', '.join(supported_years)}")
sys.exit(1)

# Compose directory paths based on the year
cogs_dir = f"/data/GEBCO/{year}/COGS"
index_file = "index.geojson"

create_geojson(cogs_dir, index_file)
2 changes: 1 addition & 1 deletion targets/slideruleearth-aws/asset_directory.csv
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ swot-sim-glorys, podaac-cloud, s3, podaac-ops-cumulus-prote
usgs3dep-1meter-dem, nil, nil, /vsis3/prd-tnm, nil, us-west-2, https://s3.us-west-2.amazonaws.com
esa-worldcover-10meter, nil, nil, /vsis3/esa-worldcover/v200/2021/map, /vsis3/sliderule/data/WORLDCOVER/ESA_WorldCover_10m_2021_v200_Map.vrt, eu-central-1, https://s3.eu-central-1.amazonaws.com
meta-globalcanopy-1meter, nil, nil, /vsis3/dataforgood-fb-data/forests/v1/alsgedi_global_v6_float/, /vsis3/sliderule/data/GLOBALCANOPY/META_GlobalCanopyHeight_1m_2024_v1.vrt, us-east-1, https://s3.us-east-1.amazonaws.com
gebco-bathy, iam-role, s3, sliderule/data/GEBCO/2023, index.geojson, us-west-2, https://s3.us-west-2.amazonaws.com
gebco-bathy, iam-role, s3, sliderule/data/GEBCO, index.geojson, us-west-2, https://s3.us-west-2.amazonaws.com
bluetopo-bathy, iam-role, s3, /vsis3/noaa-ocs-nationalbathymetry-pds/BlueTopo/, _BlueTopo_Tile_Scheme, us-east-1, https://s3.us-east-1.amazonaws.com
landsat-hls, lpdaac-cloud, nil, /vsis3/lp-prod-protected, nil, us-west-2, https://s3.us-west-2.amazonaws.com
arcticdem-mosaic, nil, nil, /vsis3/pgc-opendata-dems/arcticdem/mosaics/v4.1, 2m_dem_tiles.vrt, us-west-2, https://s3.us-west-2.amazonaws.com
Expand Down

0 comments on commit 0f380ac

Please sign in to comment.