Skip to content

Commit

Permalink
latest atl24 cleanup merged
Browse files Browse the repository at this point in the history
  • Loading branch information
jpswinski committed Oct 22, 2024
2 parents e06dd34 + ca68256 commit d8c1767
Show file tree
Hide file tree
Showing 19 changed files with 543 additions and 424 deletions.
71 changes: 43 additions & 28 deletions clients/python/sliderule/earthdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,29 +234,30 @@ def __cmr_collection_query(provider, short_name):
return search_results['feed']['entry']

def __cmr_query(provider, short_name, version, time_start, time_end, **kwargs):
"""Perform a scrolling CMR query for files matching input criteria."""
"""Perform a search-after CMR query for files matching input criteria."""
kwargs.setdefault('polygon',None)
kwargs.setdefault('name_filter',None)
kwargs.setdefault('return_metadata',False)
# build params
params = '&short_name={0}'.format(short_name)
if version != None:
params += '&version={0}'.format(version)
if time_start != None and time_end != None:
if time_start is not None and time_end is not None:
params += '&temporal[]={0},{1}'.format(time_start, time_end)
if kwargs['polygon']:
params += '&polygon={0}'.format(kwargs['polygon'])
if kwargs['name_filter']:
params += '&options[producer_granule_id][pattern]=true'
params += '&producer_granule_id[]=' + kwargs['name_filter']

CMR_URL = 'https://cmr.earthdata.nasa.gov'
cmr_query_url = ('{0}/search/granules.json?provider={1}'
'&sort_key[]=start_date&sort_key[]=producer_granule_id'
'&scroll=true&page_size={2}'.format(CMR_URL, provider, CMR_PAGE_SIZE))
'&page_size={2}'.format(CMR_URL, provider, CMR_PAGE_SIZE))
cmr_query_url += params
logger.debug('cmr request={0}\n'.format(cmr_query_url))
logger.debug(f'Initial CMR request: {cmr_query_url}')

cmr_scroll_id = None
cmr_search_after = None
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
Expand All @@ -266,15 +267,18 @@ def __cmr_query(provider, short_name, version, time_start, time_end, **kwargs):
metadata = sliderule.emptyframe()
while True:
req = urllib.request.Request(cmr_query_url)
if cmr_scroll_id:
req.add_header('cmr-scroll-id', cmr_scroll_id)
if cmr_search_after:
req.add_header('CMR-Search-After', cmr_search_after)
logger.debug(f'Requesting next page with CMR-Search-After: {cmr_search_after}')

response = urllib.request.urlopen(req, context=ctx)
if not cmr_scroll_id:
# Python 2 and 3 have different case for the http headers
headers = {k.lower(): v for k, v in dict(response.info()).items()}
cmr_scroll_id = headers['cmr-scroll-id']

headers = {k.lower(): v for k, v in dict(response.info()).items()}
cmr_search_after = headers.get('cmr-search-after')

search_page = response.read()
search_page = json.loads(search_page.decode('utf-8'))

url_scroll_results = __cmr_filter_urls(search_page, DATASETS[short_name]["formats"])
if not url_scroll_results:
break
Expand All @@ -284,10 +288,22 @@ def __cmr_query(provider, short_name, version, time_start, time_end, **kwargs):
metadata_results = __cmr_granule_metadata(search_page)
else:
metadata_results = geopandas.pd.DataFrame([None for _ in url_scroll_results])

# append granule metadata
metadata = geopandas.pd.concat([metadata, metadata_results])

return (urls,metadata)
# Two ways to determine that there is no more data available:
# 1. The number of granules in the current response is less than the requested 'page_size':
# 2. The absence of the 'CMR-Search-After' header
result_count = len(search_page['feed']['entry'])
if result_count < CMR_PAGE_SIZE:
logger.debug(f'Received {result_count} results, fewer than page size. Ending pagination after processing.')
break
if not cmr_search_after:
logger.debug('No CMR-Search-After header found, no more pages.')
break

return urls, metadata

###############################################################################
# CMR UTILITIES
Expand Down Expand Up @@ -389,7 +405,12 @@ def __cmr_max_version(provider, short_name):
#
def __build_geojson(rsps):
geojson = rsps.json()
del geojson["links"]
next = None
if "links" in geojson:
for link in geojson["links"]:
if link["rel"] == "next":
next = link["href"]
del geojson["links"]
if 'numberMatched' in geojson:
del geojson['numberMatched']
if 'numberReturned' in geojson:
Expand All @@ -410,7 +431,7 @@ def __build_geojson(rsps):
if "href" in assetsDict[val]:
propertiesDict[val] = assetsDict[val]["href"]
del geojson["features"][i]["assets"]
return geojson
return geojson, next

#
# Perform a STAC Query
Expand Down Expand Up @@ -450,22 +471,16 @@ def __stac_search(provider, short_name, collections, polygons, time_start, time_
# make initial stac request
data = context.post(url, data=json.dumps(rqst), headers=headers)
data.raise_for_status()
geojson = __build_geojson(data)

# iterate through additional pages if not all returned
num_returned = geojson["context"]["returned"]
num_matched = geojson["context"]["matched"]
if num_matched > max_requested_resources:
logger.warn("Number of matched resources truncated from {} to {}".format(num_matched, max_requested_resources))
num_matched = max_requested_resources
num_pages = int((num_matched + (num_returned - 1)) / num_returned)
for page in range(2, num_pages+1):
rqst["page"] = page
data = context.post(url, data=json.dumps(rqst), headers=headers)
geojson, next_link = __build_geojson(data)

# Continue fetching pages if 'next' link is available
while next_link:
data = context.get(next_link, headers=headers)
data.raise_for_status()
_geojson = __build_geojson(data)
_geojson, next_link = __build_geojson(data)
geojson["features"] += _geojson["features"]
geojson["context"]["returned"] = num_matched

geojson["context"]["returned"] = len(geojson["features"])
geojson["context"]["limit"] = max_requested_resources

# return geojson dictionary
Expand Down
6 changes: 4 additions & 2 deletions clients/python/tests/test_gedi.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ def test_gedi(self, init):
gdf = icesat2.atl06p(parms, resources=[resource])
assert init
assert gdf.describe()["gedi.time"]["std"] == 0.0
assert abs(gdf.describe()["gedi.value"]["mean"] - 3143.5934365441703) < 0.001
# assert abs(gdf.describe()["gedi.value"]["mean"] - 3143.5934365441703) < 0.001
assert abs(gdf.describe()["gedi.value"]["mean"] - 3142.8683679064293) < 0.001
assert gdf.describe()["gedi.file_id"]["max"] == 0.0
assert gdf.describe()["gedi.flags"]["max"] == 0.0

Expand Down Expand Up @@ -136,4 +137,5 @@ def test_gedi(self, init):
assert key in gdf.keys()
assert abs(gdf.describe()["canopy_openness"]["max"] - 10.390829086303711) < 0.001
df = gdf[gdf["gedi.value"] > -9999.0]
assert abs(sum(df["gedi.value"]) - 42767.289459228516) < 0.001
# assert abs(sum(df["gedi.value"]) - 4168.20367060658032) < 0.001
assert abs(sum(df["gedi.value"]) - 42555.52866346482) < 0.001
12 changes: 12 additions & 0 deletions clients/python/tests/test_landsat.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,18 @@ def test_samples(self, init):
assert init
assert len(rsps) > 0

def test_cmr_stac(self, init):
time_start = "2000-01-01T00:00:00Z"
time_end = "2022-02-01T23:59:59Z"
polygon = [ {"lon": -177.0000000001, "lat": 51.0000000001},
{"lon": -179.0000000001, "lat": 51.0000000001},
{"lon": -179.0000000001, "lat": 49.0000000001},
{"lon": -177.0000000001, "lat": 49.0000000001},
{"lon": -177.0000000001, "lat": 51.0000000001} ]
catalog = earthdata.stac(short_name="HLS", polygon=polygon, time_start=time_start, time_end=time_end, as_str=True)
assert len(catalog) >= 6359


def test_subset1(self, init):
time_start = "2021-01-01T00:00:00Z"
time_end = "2021-02-01T23:59:59Z"
Expand Down
28 changes: 20 additions & 8 deletions datasets/landsat/package/LandsatHlsRaster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ void LandsatHlsRaster::getIndexFile(const OGRGeometry* geo, std::string& file, c
static_cast<void>(geo);
static_cast<void>(points);
file = indexFile;
mlog(DEBUG, "Using %s", file.c_str());
// mlog(DEBUG, "Using %s", file.c_str());
}


Expand Down Expand Up @@ -344,7 +344,7 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr
assert(ur);

/* Get the sample for this point from unique raster */
for(const point_sample_t& ps : ur->pointSamples)
for(point_sample_t& ps : ur->pointSamples)
{
if(ps.pointIndex == pointIndx)
{
Expand All @@ -360,12 +360,24 @@ uint32_t LandsatHlsRaster::_getGroupSamples(sample_mode_t mode, const rasters_gr
const bool returnBandSample = it->second;
if(returnBandSample)
{
/* Create a copy of the sample and add it to the list */
RasterSample* sample = new RasterSample(*ps.sample);

/* Set flags for this sample */
sample->flags = flags;
slist->add(sample);
RasterSample* s;
if(!ps.sampleReturned.exchange(true))
{
s = ps.sample;
}
else
{
/* Sample has already been returned, must create a copy */
s = new RasterSample(*ps.sample);
}

/* Set time for this sample */
s->time = rgroup->gpsTime / 1000;

/* Set flags for this sample, add it to the list */
s->flags = flags;
slist->add(s);
errors |= ps.ssErrors;
}
}
errors |= ps.ssErrors;
Expand Down
9 changes: 8 additions & 1 deletion datasets/landsat/selftests/landsat_reader.lua
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,12 @@ runner.check(sampleCnt == 180)
print(string.format("POI sample time: %.2f (%d threads)", stoptime - starttime, sampleCnt))


--[[
EL - we are currently not using subseting and this test is very slow (180 rasters to subset)
- I am disabling it for now. There are other tests (shorter) which subset rasters.
- I am leaving this here in case we want to re-enable it later.
print(string.format("\n-------------------------------------------------\nLandsat AOI Subset test\n-------------------------------------------------"))
-- AOI extent (extent of hls_trimmed.geojson)
Expand Down Expand Up @@ -367,6 +373,7 @@ if tbl ~= nil then
runner.check(size > 0)
end
end
--]]



Expand Down Expand Up @@ -440,7 +447,7 @@ for i=1, maxSamples do
sampleCnt = sampleCnt + 1
end
local stoptime = time.latch();
print(string.format("POI sample %d points time: %.2f (%d threads)", sampleCnt, stoptime - starttime, threadCnt))
print(string.format("POI sample %d points time: %.2f", sampleCnt, stoptime - starttime))
runner.check(sampleCnt == maxSamples)
dem = nil

Expand Down
20 changes: 10 additions & 10 deletions datasets/landsat/selftests/plugin_unittest.lua
Original file line number Diff line number Diff line change
Expand Up @@ -22,28 +22,28 @@ while not aws.csget("lpdaac-cloud") do
end


local geojsonfile = td.."../data/hls_trimmed.geojson"
local geojsonfile = td.."../data/grand_mesa.geojson"
local f = io.open(geojsonfile, "r")
local contents = f:read("*all")
f:close()

-- Unit Test --

local lon = -179.0
local lat = 51.0

local lon_incr = 0.01
local lat_incr = 0.00
local pointCount = 100

print(string.format("\n-------------------------------------------------\nLandsat Plugin test (NDVI)\n-------------------------------------------------"))
local demType = "landsat-hls"
local dem = geo.raster(geo.parms({ asset = demType, algorithm = "NearestNeighbour", radius = 0, bands = {"NDVI"}, catalog=contents, sort_by_index=true }))
local t0str = "2022:01:05:00:00:00"
local t1str = "2022:01:15:00:00:00"
local dem = geo.raster(geo.parms({ asset = demType, algorithm = "NearestNeighbour", radius = 0, t0=t0str, t1=t1str, bands = {"NDVI"}, catalog = contents, sort_by_index = true }))
runner.check(dem ~= nil)

local ut = geo.ut_sample(dem)
runner.check(ut ~= nil)
local status = ut:test(lon, lat, lon_incr, lat_incr, pointCount)
-- This test ignores lon, lat, lon_incr, lat_incr, pointCount as they are not used.
-- It opens a test file with points.
local pointsFile = td.."../data/grand_mesa_poi.txt"
local pointsInFile = 26183 -- number of points in file
local maxPointCount = 1000 -- number of points to sample, 1000 will trigger all threaded code
status = ut:test(0, 0, 0, 0, maxPointCount, pointsFile);
runner.check(status, "Failed sampling test")

-- Clean Up --
Expand Down
2 changes: 1 addition & 1 deletion datasets/landsat/systests/grand_mesa_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ for i=1,#arr do
local lon = arr[i][1]
local lat = arr[i][2]
local height = 0
-- print(string.format("%0.16f, %0.16f", lon, lat))
local tbl, err = dem:sample(lon, lat, height)
if err ~= 0 then
print(string.format("======> FAILED to read", lon, lat, height))
Expand Down Expand Up @@ -115,4 +116,3 @@ local dtime = stoptime - starttime
print(string.format("\nSamples: %d, wrong NDVI: %d, wrong groupID: %d", samplesCnt, badNDVICnt, badFileCnt))
print(string.format("ExecTime: %f", dtime))

os.exit()
6 changes: 3 additions & 3 deletions datasets/opendata/selftests/worldcover_reader.lua
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ local assets = asset.loaddir()
-- Unit Test --

local sigma = 1.0e-9
local lon = -108.1
local lat = 39.1
local height = 0.0
local lon = -108.1
local lat = 39.1
local height = 0.0


print(string.format("\n-------------------------------------------------\nesa worldcover 10meter sample POI\n-------------------------------------------------"))
Expand Down
48 changes: 48 additions & 0 deletions datasets/usgs3dep/selftests/plugin_unittest.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
local runner = require("test_executive")
local console = require("console")
local asset = require("asset")
local assets = asset.loaddir()
local json = require("json")
local _,td = runner.srcscript()

-- console.monitor:config(core.LOG, core.DEBUG)
-- sys.setlvl(core.LOG, core.DEBUG)

-- Check If Present --
if not core.UNITTEST then return end

-- Setup --
local assets = asset.loaddir()

local geojsonfile = td.."../data/grand_mesa_1m_dem.geojson"
local f = io.open(geojsonfile, "r")
local contents = f:read("*all")
f:close()

-- Unit Test --

print(string.format("\n-------------------------------------------------\n3dep unit test\n-------------------------------------------------"))
local demType = "usgs3dep-1meter-dem"
local dem = geo.raster(geo.parms({ asset = demType, algorithm = "NearestNeighbour", radius = 0, catalog = contents, sort_by_index = true }))
runner.check(dem ~= nil)

ut = geo.ut_sample(dem)
runner.check(ut ~= nil)
-- This test ignores lon, lat, lon_incr, lat_incr, pointCount as they are not used.
-- It opens a test file with points.
local pointsFile = td.."../../landsat/data/grand_mesa_poi.txt"
print(string.format("Points file: %s", pointsFile))
local pointsInFile = 26183 -- number of points in file
local maxPointCount = 110
status = ut:test(0, 0, 0, 0, maxPointCount, pointsFile);
runner.check(status, "Failed sampling test")
ut = nil



-- Clean Up --

-- Report Results --

runner.report()

Loading

0 comments on commit d8c1767

Please sign in to comment.