diff --git a/Project.toml b/Project.toml index 0592f99..a0f9c27 100644 --- a/Project.toml +++ b/Project.toml @@ -1,11 +1,12 @@ name = "SpaceLiDAR" uuid = "29bf0bfc-420f-4fa5-b441-811fd9e6e11d" authors = ["Maarten Pronk", "Deltares"] -version = "0.3.0" +version = "0.4.0" [deps] AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95" CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" +DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" @@ -28,10 +29,11 @@ TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53" [compat] AWSS3 = "0.10, 0.11" CategoricalArrays = "^0.9, 0.10" +DataAPI = "1" DataFrames = "1" Distances = "^0.10" Extents = "^0.1" -FillArrays = "1" +FillArrays = "0.2, 1" GeoFormatTypes = "0.4" GeoInterface = "1" HDF5 = "^0.15, 0.16" diff --git a/README.md b/README.md index 707225e..02cfe3d 100644 --- a/README.md +++ b/README.md @@ -14,10 +14,10 @@ Currently supports the following data products: |--- |--- |--- |--- | |ICESat| GLAH06 v34 | [UG](https://nsidc.org/sites/nsidc.org/files/MULTI-GLAH01-V033-V034-UserGuide.pdf) | [ATBD](https://eospso.nasa.gov/sites/default/files/atbd/ATBD-GLAS-02.pdf) | |ICESat| GLAH14 v34 | [UG](https://nsidc.org/sites/nsidc.org/files/MULTI-GLAH01-V033-V034-UserGuide.pdf) | [ATBD](https://eospso.nasa.gov/sites/default/files/atbd/ATBD-GLAS-02.pdf) | -|ICESat-2| ATL03 v5 | [UG](https://nsidc.org/sites/nsidc.org/files/ATL03-V005-UserGuide.pdf) | [ATBD](https://icesat-2.gsfc.nasa.gov/sites/default/files/page_files/ICESat2_ATL03_ATBD_r005.pdf) | -|ICESat-2| ATL06 v5 | [UG](https://nsidc.org/sites/nsidc.org/files/ATL03-V005-UserGuide.pdf) | [ATBD](https://icesat-2.gsfc.nasa.gov/sites/default/files/page_files/ICESat2_ATL06_ATBD_r005.pdf) | -|ICESat-2| ATL08 v5 | [UG](https://nsidc.org/sites/nsidc.org/files/ATL08-V005-UserGuide.pdf) | [ATBD](https://nsidc.org/sites/default/files/icesat2_atl08_atbd_r005_1.pdf) | -|ICESat-2| ATL12 v5 | [UG](https://nsidc.org/sites/nsidc.org/files/ATL12-V005-UserGuide.pdf) | [ATBD](https://icesat-2.gsfc.nasa.gov/sites/default/files/page_files/ICESat2_ATL12_ATBD_r005.pdf) | +|ICESat-2| ATL03 v6 | [UG](https://nsidc.org/sites/default/files/documents/user-guide/atl03-v006-userguide.pdf) | [ATBD](https://icesat-2.gsfc.nasa.gov/sites/default/files/page_files/ICESat2_ATL03_ATBD_r006.pdf) | +|ICESat-2| ATL06 v5 | [UG](https://nsidc.org/sites/default/files/documents/user-guide/atl06-v006-userguide.pdf) | [ATBD](https://icesat-2.gsfc.nasa.gov/sites/default/files/page_files/ICESat2_ATL06_ATBD_r006.pdf) | +|ICESat-2| ATL08 v6 | [UG](https://nsidc.org/sites/default/files/documents/user-guide/atl08-v006-userguide.pdf) | [ATBD](https://nsidc.org/sites/default/files/documents/technical-reference/icesat2_atl08_atbd_v006_0.pdf) | +|ICESat-2| ATL12 v5 | [UG](https://nsidc.org/sites/default/files/documents/user-guide/atl12-v006-userguide.pdf) | [ATBD](https://icesat-2.gsfc.nasa.gov/sites/default/files/page_files/ICESat2_ATL12_ATBD_r006.pdf) | |GEDI| L2A v2 | [UG](https://lpdaac.usgs.gov/documents/998/GEDI02_UserGuide_V21.pdf) | [ATBD](https://lpdaac.usgs.gov/documents/581/GEDI_WF_ATBD_v1.0.pdf) | For an overview with code examples, see the FOSS4G Pluto notebook [here](https://www.evetion.nl/SpaceLiDAR.jl/dev/tutorial/foss4g_2021.jl.html) @@ -26,7 +26,7 @@ If you use SpaceLiDAR.jl in your research, please consider [citing it](https://z # Install ```julia -] add SpaceLiDAR +]add SpaceLiDAR ``` # Usage @@ -34,12 +34,12 @@ Search for data ```julia using SpaceLiDAR using Extents -# Find all ATL08 granules +# Find all ATL08 granules ever granules = search(:ICESat2, :ATL08) # Find only ATL03 granules in a part of Vietnam vietnam = Extent(X = (102., 107.0), Y = (8.0, 12.0)) -granules = search(:ICESat2, :ATL08; bbox=vietnam, version=5) +granules = search(:ICESat2, :ATL08; extent=vietnam, version=6) # Find GEDI granules in the same way granules = search(:GEDI, :GEDI02_A) @@ -58,10 +58,10 @@ SpaceLiDAR.netrc!(username, password) # replace with your credentials fn = SpaceLiDAR.download!(granule) # You can also load a granule from disk -granule = granule_from_file(fn) +granule = granule(fn) # Or from a folder -local_granules = granules_from_folder(folder) +local_granules = granules(folder) # Instantiate search results locally (useful for GEDI location indexing) local_granules = instantiate(granules, folder) @@ -71,7 +71,7 @@ Derive points ```julia using DataFrames fn = "GEDI02_A_2019242104318_O04046_01_T02343_02_003_02_V002.h5" -g = SpaceLiDAR.granule_from_file(fn) +g = SpaceLiDAR.granule(fn) df = DataFrame(g) 149680×15 DataFrame Row │ longitude latitude height height_error datetime intensity sensitivity surface quality nmo ⋯ @@ -92,7 +92,7 @@ Derive linestrings ```julia using DataFrames fn = "ATL03_20181110072251_06520101_003_01.h5" -g = SpaceLiDAR.granule_from_file(fn) +g = SpaceLiDAR.granule(fn) tlines = DataFrame(SpaceLiDAR.lines(g, step=10000)) Table with 4 columns and 6 rows: geom sun_angle track datetime diff --git a/docs/src/changelog.md b/docs/src/changelog.md index 67229e3..c676780 100644 --- a/docs/src/changelog.md +++ b/docs/src/changelog.md @@ -1,7 +1,30 @@ -## v0.3.0 +## Unreleased -!!! danger - This is a **breaking** release +- Working on generic retrieval of parameters as a Table from a granule, instead of the hardcoded choices made now for each product. Would result in methods like `points(granule, [vara, varb])`. + +### New features +- New types `Table` and `PartitionedTable`, which support the Tables.jl interface. This prevents allocating code like `reduce(vcat, DataFrame.(points(granule)))` to get a DataFrame. You can now just call `DataFrame(table)`. +- Reduced allocations in retrieving point data. +- Introduced `before` and `after` keywords in `search`, to search by date(ranges). + +### Fixed +- Empty (filtered) granules could result in `Vector{BitVector}` columns, which have been changed to `Vector{Bool}`. + +### Breaking +- `points` now return either a `Table` or a `PartitionedTable` instead of `NamedTuple` or `Vector{NamedTuple}`. The old behaviour can be regained by calling `parent` on these tables. +- Removed `number_of_returns` and `return_number` from ICESat-2 ATL08 canopy output. + +### Deprecated +- Renamed `granule_from_file` to `granule` +- Renamed `granules_from_file` to `granules` +- Renamed `write_granule_urls!` to `write_urls` + +### Changed +- Most of the search functionality has been moved out to the more generic [EarthData.jl](https://github.com/evetion/EarthData.jl) +- Updated ICESat-2 from version 5 to version 6 + + +## v0.3.0 - GeoInterface, Extents support - Bounding box using Extent subsetting on all `points` functions @@ -25,7 +48,7 @@ ## v0.2.0 -!!! danger +!!! warning This is a **breaking** release - Many of the column names have changed to be more descriptive. diff --git a/docs/src/guides/downloads.md b/docs/src/guides/downloads.md index e1b1d5e..5afabea 100644 --- a/docs/src/guides/downloads.md +++ b/docs/src/guides/downloads.md @@ -7,7 +7,7 @@ In such cases it's useful to export a list of granules to a text file and use an ```julia granules = find(:ICESat2, "ATL08") -SpaceLiDAR.write_granule_urls!("atl08_world.txt", granules) +SpaceLiDAR.write_urls("atl08_world.txt", granules) ``` In my case, I use [aria2c](https://aria2.github.io/manual/en/html/aria2c.html). diff --git a/docs/src/index.md b/docs/src/index.md index a8cab1d..1bba2a7 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -11,10 +11,10 @@ Currently supports the following data products: |--- |--- |--- |--- | |ICESat| GLAH06 v34 | [UG](https://nsidc.org/sites/nsidc.org/files/MULTI-GLAH01-V033-V034-UserGuide.pdf) | [ATBD](https://eospso.nasa.gov/sites/default/files/atbd/ATBD-GLAS-02.pdf) | |ICESat| GLAH14 v34 | [UG](https://nsidc.org/sites/nsidc.org/files/MULTI-GLAH01-V033-V034-UserGuide.pdf) | [ATBD](https://eospso.nasa.gov/sites/default/files/atbd/ATBD-GLAS-02.pdf) | -|ICESat-2| ATL03 v5 | [UG](https://nsidc.org/sites/nsidc.org/files/ATL03-V005-UserGuide.pdf) | [ATBD](https://icesat-2.gsfc.nasa.gov/sites/default/files/page_files/ICESat2_ATL03_ATBD_r005.pdf) | -|ICESat-2| ATL06 v5 | [UG](https://nsidc.org/sites/nsidc.org/files/ATL03-V005-UserGuide.pdf) | [ATBD](https://icesat-2.gsfc.nasa.gov/sites/default/files/page_files/ICESat2_ATL06_ATBD_r005.pdf) | -|ICESat-2| ATL08 v5 | [UG](https://nsidc.org/sites/nsidc.org/files/ATL08-V005-UserGuide.pdf) | [ATBD](https://nsidc.org/sites/default/files/icesat2_atl08_atbd_r005_1.pdf) | -|ICESat-2| ATL12 v5 | [UG](https://nsidc.org/sites/nsidc.org/files/ATL12-V005-UserGuide.pdf) | [ATBD](https://icesat-2.gsfc.nasa.gov/sites/default/files/page_files/ICESat2_ATL12_ATBD_r005.pdf) | +|ICESat-2| ATL03 v6 | [UG](https://nsidc.org/sites/default/files/documents/user-guide/atl03-v006-userguide.pdf) | [ATBD](https://icesat-2.gsfc.nasa.gov/sites/default/files/page_files/ICESat2_ATL03_ATBD_r006.pdf) | +|ICESat-2| ATL06 v5 | [UG](https://nsidc.org/sites/default/files/documents/user-guide/atl06-v006-userguide.pdf) | [ATBD](https://icesat-2.gsfc.nasa.gov/sites/default/files/page_files/ICESat2_ATL06_ATBD_r006.pdf) | +|ICESat-2| ATL08 v6 | [UG](https://nsidc.org/sites/default/files/documents/user-guide/atl08-v006-userguide.pdf) | [ATBD](https://nsidc.org/sites/default/files/documents/technical-reference/icesat2_atl08_atbd_v006_0.pdf) | +|ICESat-2| ATL12 v5 | [UG](https://nsidc.org/sites/default/files/documents/user-guide/atl12-v006-userguide.pdf) | [ATBD](https://icesat-2.gsfc.nasa.gov/sites/default/files/page_files/ICESat2_ATL12_ATBD_r006.pdf) | |GEDI| L2A v2 | [UG](https://lpdaac.usgs.gov/documents/998/GEDI02_UserGuide_V21.pdf) | [ATBD](https://lpdaac.usgs.gov/documents/581/GEDI_WF_ATBD_v1.0.pdf) | diff --git a/docs/src/tutorial/usage.md b/docs/src/tutorial/usage.md index 8a8dba0..e0d4b73 100644 --- a/docs/src/tutorial/usage.md +++ b/docs/src/tutorial/usage.md @@ -7,7 +7,7 @@ granules = search(:ICESat2, :ATL08) # Find only ATL03 granules in a part of Vietnam vietnam = Extent(X = (102., 107.0), Y = (8.0, 12.0)) -granules = search(:ICESat2, :ATL08; bbox=vietnam, version=5) +granules = search(:ICESat2, :ATL08; extent=vietnam, version=6) # Find GEDI granules in the same way granules = search(:GEDI, :GEDI02_A) @@ -26,10 +26,10 @@ SpaceLiDAR.netrc!(username, password) # replace with your credentials fn = SpaceLiDAR.download!(granule) # You can also load a granule from disk -granule = granule_from_file(fn) +granule = granule(fn) # Or from a folder -local_granules = granules_from_folder(folder) +local_granules = granules(folder) # Instantiate search results locally (useful for GEDI location indexing) local_granules = instantiate(granules, folder) @@ -40,7 +40,7 @@ local_granules = instantiate(granules, folder) ```julia using DataFrames fn = "GEDI02_A_2019242104318_O04046_01_T02343_02_003_02_V002.h5" -g = SpaceLiDAR.granule_from_file(fn) +g = SpaceLiDAR.granule(fn) df = DataFrame(g) 149680×15 DataFrame Row │ longitude latitude height height_error datetime intensity sensitivity surface quality nmo ⋯ @@ -61,7 +61,7 @@ df = DataFrame(g) ```julia using DataFrames fn = "ATL03_20181110072251_06520101_003_01.h5" -g = SpaceLiDAR.granule_from_file(fn) +g = SpaceLiDAR.granule(fn) tlines = DataFrame(SpaceLiDAR.lines(g, step=10000)) Table with 4 columns and 6 rows: geom sun_angle track datetime diff --git a/src/GEDI/GEDI.jl b/src/GEDI/GEDI.jl index a849c16..5a1a8ea 100644 --- a/src/GEDI/GEDI.jl +++ b/src/GEDI/GEDI.jl @@ -35,7 +35,7 @@ function info(g::GEDI_Granule) end function gedi_info(filename) - id, _ = splitext(filename) + id, _ = splitext(basename(filename)) if endswith(id, "V002") type, name, datetime, orbit, sub_orbit, track, ppds, pge_version, revision, version = Base.split(id, "_") version = version[2:end] diff --git a/src/GEDI/L2A.jl b/src/GEDI/L2A.jl index 0bbac45..af8d62b 100644 --- a/src/GEDI/L2A.jl +++ b/src/GEDI/L2A.jl @@ -49,20 +49,27 @@ function points( :points, ) end - nts = Vector{NamedTuple}() - HDF5.h5open(granule.url, "r") do file - for track in tracks - if haskey(file, track) - for track_nt ∈ points(granule, file, track, step, bbox, ground, canopy, filtered) - if !isempty(track_nt.height) - track_nt.height[track_nt.height.==fill_value] .= NaN - end - push!(nts, track_nt) - end + nts = HDF5.h5open(granule.url, "r") do file + + # Determine number of loops over tracks and ground and/or canopy + ftracks = filter(track -> haskey(file, track), tracks) + if ground && canopy + grounds = (Bool(i % 2) for i = 1:length(ftracks)*2) + ftracks = repeat(collect(ftracks), inner = 2) + elseif ground || canopy + grounds = Base.Iterators.repeated(ground, length(ftracks)) + else + throw(ArgumentError("Choose at least one of `ground` or `canopy`")) + end + map(Tuple(zip(ftracks, grounds))) do (track, ground) + track_nt = points(granule, file, track, step, bbox, ground, canopy, filtered) + if !isempty(track_nt.height) + track_nt.height[track_nt.height.==fill_value] .= NaN end + track_nt end end - nts + PartitionedTable(nts, granule) end function points( @@ -80,40 +87,15 @@ function points( if !isnothing(bbox) # find data that falls withing bbox if ground - x_grd = read_dataset(group, "lon_lowestmode")::Vector{Float64} - y_grd = read_dataset(group, "lat_lowestmode")::Vector{Float64} - - ind = (x_grd .> bbox.X[1]) .& (y_grd .> bbox.Y[1]) .& (x_grd .< bbox.X[2]) .& (y_grd .< bbox.Y[2]) - start_grd = findfirst(ind) - stop_grd = findlast(ind) - end - - if canopy - x_can = read_dataset(group, "lon_highestreturn")::Vector{Float64} - y_can = read_dataset(group, "lat_highestreturn")::Vector{Float64} - - ind = (x_can .> bbox.X[1]) .& (y_can .> bbox.Y[1]) .& (x_can .< bbox.X[2]) .& (y_can .< bbox.Y[2]) - start_can = findfirst(ind) - stop_can = findlast(ind) - end - - if ground && canopy - # take maximum extent between ground and canopy - if isnothing(start_grd) && isnothing(start_can) - start = start_grd - stop = stop_grd - else - start = min(start_grd, start_can) - stop = max(stop_grd, stop_can) - end - - elseif ground - start = start_grd - stop = stop_grd - elseif canopy - start = start_can - stop = stop_can + x = read_dataset(group, "lon_lowestmode")::Vector{Float64} + y = read_dataset(group, "lat_lowestmode")::Vector{Float64} + else + x = read_dataset(group, "lon_highestreturn")::Vector{Float64} + y = read_dataset(group, "lat_highestreturn")::Vector{Float64} end + ind = (x .> bbox.X[1]) .& (y .> bbox.Y[1]) .& (x .< bbox.X[2]) .& (y .< bbox.Y[2]) + start = findfirst(ind) + stop = findlast(ind) if isnothing(start) # no data found @@ -121,85 +103,38 @@ function points( power = occursin("Full power", read_attribute(group, "description")::String) - if canopy - nt_canopy = ( - longitude = Float32[], - latitude = Float32[], - height = Float32[], - height_error = Float32[], - datetime = Float64[], - intensity = Float32[], - sensitivity = Float32[], - surface = Bool[], - quality = Bool[], - nmodes = UInt8[], - track = Fill(track, 0), - strong_beam = Fill(power, 0), - classification = Fill("high_canopy", 0), - sun_angle = Float32[], - height_reference = Float32[], - ) - end - - if ground - nt_ground = ( - longitude = Float32[], - latitude = Float32[], - height = Float32[], - height_error = Float32[], - datetime = Float64[], - intensity = Float32[], - sensitivity = Float32[], - surface = Bool[], - quality = Bool[], - nmodes = UInt8[], - track = Fill(track, 0), - strong_beam = Fill(power, 0), - classification = Fill("ground", 0), - sun_angle = Float32[], - height_reference = Float32[], - ) - end - - if canopy && ground - return nt_canopy, nt_ground - elseif canopy - return (nt_canopy,) - elseif ground - return (nt_ground,) - else - return () - end + nt = ( + longitude = Float32[], + latitude = Float32[], + height = Float32[], + height_error = Float32[], + datetime = Float64[], + intensity = Float32[], + sensitivity = Float32[], + surface = BitVector(), + quality = BitVector(), + nmodes = UInt8[], + track = Fill(track, 0), + strong_beam = Fill(power, 0), + classification = Fill(canopy ? "high_canopy" : "ground", 0), + sun_angle = Float32[], + height_reference = Float32[], + ) + return nt end - # subset x/y_grd and x/y_can - if ground && canopy - x_grd = x_grd[start:step:stop] - y_grd = y_grd[start:step:stop] - - x_can = x_can[start:step:stop] - y_can = y_can[start:step:stop] - - elseif ground - x_grd = x_grd[start:step:stop] - y_grd = y_grd[start:step:stop] - - elseif canopy - x_can = x_can[start:step:stop] - y_can = y_can[start:step:stop] - end + x = x[start:step:stop] + y = y[start:step:stop] else start = 1 stop = length(open_dataset(group, "lon_highestreturn")) if ground - x_grd = open_dataset(group, "lon_lowestmode")[start:step:stop]::Vector{Float64} - y_grd = open_dataset(group, "lat_lowestmode")[start:step:stop]::Vector{Float64} - end - - if canopy - x_can = open_dataset(group, "lon_highestreturn")[start:step:stop]::Vector{Float64} - y_can = open_dataset(group, "lat_highestreturn")[start:step:stop]::Vector{Float64} + x = open_dataset(group, "lon_lowestmode")[start:step:stop]::Vector{Float64} + y = open_dataset(group, "lat_lowestmode")[start:step:stop]::Vector{Float64} + else + x = open_dataset(group, "lon_highestreturn")[start:step:stop]::Vector{Float64} + y = open_dataset(group, "lat_highestreturn")[start:step:stop]::Vector{Float64} end end @@ -211,14 +146,11 @@ function points( aid = open_dataset(group, "selected_algorithm")[start:step:stop]::Vector{UInt8} if canopy - height_can = open_dataset(group, "elev_highestreturn")[start:step:stop]::Vector{Float32} - height_can[(height_can.<-1000.0).&(height_can.>25000.0)] .= NaN - end - - if ground - height_grd = open_dataset(group, "elev_lowestmode")[start:step:stop]::Vector{Float32} - height_grd[(height_grd.<-1000.0).&(height_grd.>25000.0)] .= NaN + height = open_dataset(group, "elev_highestreturn")[start:step:stop]::Vector{Float32} + else + height = open_dataset(group, "elev_lowestmode")[start:step:stop]::Vector{Float32} end + height[(height.<-1000.0).&(height.>25000.0)] .= NaN datetime = open_dataset(group, "delta_time")[start:step:stop]::Vector{Float64} # Quality @@ -265,57 +197,25 @@ function points( end datetime = unix2datetime.(datetime .+ t_offset) - if canopy - nt_canopy = ( - longitude = x_can[m], - latitude = y_can[m], - height = height_can[m], - height_error = height_error[m], - datetime = datetime[m], - intensity = intensity[m], - sensitivity = sensitivity[m], - surface = Bool.(surface_flag[m]), - quality = Bool.(quality[m]), - nmodes = nmodes[m], - track = Fill(track, sum(m)), - strong_beam = Fill(power, sum(m)), - classification = Fill("high_canopy", sum(m)), - sun_angle = sun_angle[m], - height_reference = height_reference[m], - # range = zarange[m] - ) - end - - if ground - nt_ground = ( - longitude = x_grd[m], - latitude = y_grd[m], - height = height_grd[m], - height_error = height_error[m], - datetime = datetime[m], - intensity = intensity[m], - sensitivity = sensitivity[m], - surface = Bool.(surface_flag[m]), - quality = Bool.(quality[m]), - nmodes = nmodes[m], - track = Fill(track, sum(m)), - strong_beam = Fill(power, sum(m)), - classification = Fill("ground", sum(m)), - sun_angle = sun_angle[m], - height_reference = height_reference[m], - # range = zarange[m] - ) - end - - if canopy && ground - nt_canopy, nt_ground - elseif canopy - (nt_canopy,) - elseif ground - (nt_ground,) - else - () - end + nt = ( + longitude = x[m], + latitude = y[m], + height = height[m], + height_error = height_error[m], + datetime = datetime[m], + intensity = intensity[m], + sensitivity = sensitivity[m], + surface = Bool.(surface_flag[m]), + quality = Bool.(quality[m]), + nmodes = nmodes[m], + track = Fill(track, sum(m)), + strong_beam = Fill(power, sum(m)), + classification = Fill(canopy ? "high_canopy" : "ground", sum(m)), + sun_angle = sun_angle[m], + height_reference = height_reference[m], + # range = zarange[m] + ) + nt end @@ -336,19 +236,24 @@ function lines( :points, ) end - nts = Vector{NamedTuple}() - HDF5.h5open(granule.url, "r") do file - for track in tracks - if haskey(file, track) - for track_df ∈ points(granule, file, track, step, bbox, ground, canopy, filtered) - line = Line(track_df.longitude, track_df.latitude, Float64.(track_df.height)) - nt = (geom = line, track = track, strong_beam = track_df.strong_beam[1], granule = granule.id) - push!(nts, nt) - end - end + nts = HDF5.h5open(granule.url, "r") do file + + ftracks = filter(track -> haskey(file, track), tracks) + if ground && canopy + grounds = (Bool(i % 2) for i = 1:length(ftracks)*2) + ftracks = repeat(collect(ftracks), inner = 2) + elseif ground || canopy + grounds = Base.Iterators.repeated(ground, length(ftracks)) + else + throw(ArgumentError("Choose at least one of `ground` or `canopy`")) + end + map(Tuple(zip(ftracks, grounds))) do (track, ground) + track_df = points(granule, file, track, step, bbox, ground, canopy, filtered) + line = Line(track_df.longitude, track_df.latitude, Float64.(track_df.height)) + (; geom = line, track = track, strong_beam = track_df.strong_beam[1], granule = granule.id) end end - nts + PartitionedTable(nts, granule) end """ diff --git a/src/ICESat-2/ATL03.jl b/src/ICESat-2/ATL03.jl index 386be76..739e962 100644 --- a/src/ICESat-2/ATL03.jl +++ b/src/ICESat-2/ATL03.jl @@ -37,21 +37,18 @@ function points( :points, ) end - nts = Vector{NamedTuple}() - HDF5.h5open(granule.url, "r") do file + nts = HDF5.h5open(granule.url, "r") do file t_offset = open_dataset(file, "ancillary_data/atlas_sdp_gps_epoch")[1]::Float64 + gps_offset - - for track ∈ tracks - if haskey(file, track) && haskey(open_group(file, track), "heights") - track_nt = points(granule, file, track, t_offset, step, bbox) - if !isempty(track_nt.height) - track_nt.height[track_nt.height.==fill_value] .= NaN - end - push!(nts, track_nt) + ftracks = filter(track -> haskey(file, track) && haskey(open_group(file, track), "heights"), tracks) + map(ftracks) do track + track_nt = points(granule, file, track, t_offset, step, bbox) + if !isempty(track_nt.height) + track_nt.height[track_nt.height.==fill_value] .= NaN end + track_nt end end - return nts + return PartitionedTable(nts, granule) end function lines( @@ -68,29 +65,25 @@ function lines( :points, ) end - nts = Vector{NamedTuple}() - HDF5.h5open(granule.url, "r") do file + nts = HDF5.h5open(granule.url, "r") do file t_offset = open_dataset(file, "ancillary_data/atlas_sdp_gps_epoch")[1]::Float64 + gps_offset - for track ∈ tracks - if haskey(file, track) && haskey(open_group(file, track), "heights") - - track_df = points(granule, file, track, t_offset, step, bbox) - line = Line(track_df.longitude, track_df.latitude, Float64.(track_df.height)) - i = div(length(track_df.datetime), 2) + 1 - nt = ( - geom = line, - sun_angle = Float64(track_df.sun_angle[i]), - track = track, - strong_beam = track_df.strong_beam[i], - t = track_df.datetime[i], - granule = granule.id, - ) - push!(nts, nt) - end + ftracks = filter(track -> haskey(file, track) && haskey(open_group(file, track), "heights"), tracks) + map(ftracks) do track + track_df = points(granule, file, track, t_offset, step, bbox) + line = Line(track_df.longitude, track_df.latitude, Float64.(track_df.height)) + i = div(length(track_df.datetime), 2) + 1 + (; + geom = line, + sun_angle = Float64(track_df.sun_angle[i]), + track = track, + strong_beam = track_df.strong_beam[i], + t = track_df.datetime[i], + granule = granule.id, + ) end end - nts + PartitionedTable(nts, granule) end function points( @@ -211,32 +204,30 @@ function classify( atl08 = convert(:ATL08, granule) end - dfs = Vector{NamedTuple}() - HDF5.h5open(granule.url, "r") do file + nts = HDF5.h5open(granule.url, "r") do file t_offset = open_dataset(file, "ancillary_data/atlas_sdp_gps_epoch")[1]::Float64 + gps_offset - for track ∈ tracks - if haskey(file, track) && haskey(open_group(file, track), "heights") - track_df = points(granule, file, track, t_offset) + ftracks = filter(track -> haskey(file, track) && haskey(open_group(file, track), "heights"), tracks) + + map(ftracks) do track + track_df = points(granule, file, track, t_offset) - mapping = atl03_mapping(atl08, track) + mapping = atl03_mapping(atl08, track) - unique_segments = unique(mapping.segment) - index_map = create_mapping(track_df.segment, unique_segments) + unique_segments = unique(mapping.segment) + index_map = create_mapping(track_df.segment, unique_segments) - class = CategoricalArray{String,1,Int8}(fill("unclassified", length(track_df.longitude))) - for i = 1:length(mapping.segment) - index = get(index_map, mapping.segment[i], nothing) - isnothing(index) && continue - offset = mapping.index[i] - 1 - class[index+offset] = classification[mapping.classification[i]+1] - end - track_dfc = merge(track_df, (classification = class,)) - push!(dfs, track_dfc) + class = CategoricalArray{String,1,Int8}(fill("unclassified", length(track_df.longitude))) + for i = 1:length(mapping.segment) + index = get(index_map, mapping.segment[i], nothing) + isnothing(index) && continue + offset = mapping.index[i] - 1 + class[index+offset] = classification[mapping.classification[i]+1] end + merge(track_df, (classification = class,)) end end - dfs + PartitionedTable(nts, granule) end diff --git a/src/ICESat-2/ATL06.jl b/src/ICESat-2/ATL06.jl index 9400e82..9156730 100644 --- a/src/ICESat-2/ATL06.jl +++ b/src/ICESat-2/ATL06.jl @@ -35,21 +35,18 @@ function points( :points, ) end - nts = Vector{NamedTuple}() - HDF5.h5open(granule.url, "r") do file + nts = HDF5.h5open(granule.url, "r") do file t_offset = open_dataset(file, "ancillary_data/atlas_sdp_gps_epoch")[1]::Float64 + gps_offset - for track ∈ tracks - if haskey(file, track) && haskey(open_group(file, track), "land_ice_segments") - - track_nt = points(granule, file, track, t_offset, step, bbox) - if !isempty(track_nt.height) - track_nt.height[track_nt.height.==fill_value] .= NaN - end - push!(nts, track_nt) + ftracks = filter(track -> haskey(file, track) && haskey(open_group(file, track), "land_ice_segments"), tracks) + map(ftracks) do track + track_nt = points(granule, file, track, t_offset, step, bbox) + if !isempty(track_nt.height) + track_nt.height[track_nt.height.==fill_value] .= NaN end + track_nt end end - return nts + return PartitionedTable(nts, granule) end function points( @@ -82,9 +79,9 @@ function points( longitude = Float64[], latitude = Float64[], height = Float32[], - height_error = Float64[], + height_error = Float32[], datetime = Dates.DateTime[], - quality = Bool[], + quality = BitVector(), track = Fill(track, 0), strong_beam = Fill(atlas_beam_type == "strong", 0), detector_id = Fill(parse(Int8, spot_number), 0), diff --git a/src/ICESat-2/ATL08.jl b/src/ICESat-2/ATL08.jl index 931f9ab..afb2739 100644 --- a/src/ICESat-2/ATL08.jl +++ b/src/ICESat-2/ATL08.jl @@ -52,21 +52,28 @@ function points( :points, ) end - dfs = Vector{NamedTuple}() - HDF5.h5open(granule.url, "r") do file + nts = HDF5.h5open(granule.url, "r") do file t_offset = open_dataset(file, "ancillary_data/atlas_sdp_gps_epoch")[1]::Float64 + gps_offset + f = highres ? _extrapoints : points - for track in tracks - if haskey(file, track) && haskey(open_group(file, track), "land_segments") - f = highres ? _extrapoints : points - for track_nt in f(granule, file, track, t_offset, step, canopy, canopy_field, ground, ground_field, bbox) - replace!(x -> x === fill_value ? NaN : x, track_nt.height) - push!(dfs, track_nt) - end - end + # Determine number of loops over tracks and ground and/or canopy + ftracks = filter(track -> haskey(file, track) && haskey(open_group(file, track), "land_segments"), tracks) + if ground && canopy + grounds = (Bool(i % 2) for i = 1:length(ftracks)*2) + ftracks = repeat(collect(ftracks), inner = 2) + elseif ground || canopy + grounds = Base.Iterators.repeated(ground, length(ftracks)) + else + throw(ArgumentError("Choose at least one of `ground` or `canopy`")) + end + + map(Tuple(zip(ftracks, grounds))) do (track, ground) + track_nt = f(granule, file, track, t_offset, step, !ground, canopy_field, ground, ground_field, bbox) + replace!(x -> x === fill_value ? NaN : x, track_nt.height) + track_nt end end - dfs + return PartitionedTable(nts, granule) end @@ -105,20 +112,19 @@ function points( height = Float32[], height_error = Float64[], datetime = DateTime[], - quality = Bool[], - phr = Bool[], + quality = BitVector(), + phr = BitVector(), sensitivity = Float32[], - scattered = Int16[], - saturated = Int16[], - clouds = Bool[], + scattered = Int8[], + saturated = Int8[], + clouds = BitVector(), track = Fill(track, 0), strong_beam = Fill(atlas_beam_type == "strong", 0), classification = Fill("ground", 0), height_reference = Float32[], detector_id = Fill(parse(Int8, spot_number), 0), ) - - return (nt,) + return nt end # only include x and y data within bbox @@ -132,12 +138,12 @@ function points( end if ground - zt = open_dataset(group, "land_segments/terrain/h_te_mean")[start:step:stop]::Vector{Float32} - tu = open_dataset(group, "land_segments/terrain/h_te_uncertainty")[start:step:stop]::Vector{Float32} + h = open_dataset(group, "land_segments/terrain/h_te_mean")[start:step:stop]::Vector{Float32} + he = open_dataset(group, "land_segments/terrain/h_te_uncertainty")[start:step:stop]::Vector{Float32} end if canopy - zc = open_dataset(group, "land_segments/canopy/h_mean_canopy_abs")[start:step:stop]::Vector{Float32} - cu = open_dataset(group, "land_segments/canopy/h_canopy_uncertainty")[start:step:stop]::Vector{Float32} + h = open_dataset(group, "land_segments/canopy/h_mean_canopy_abs")[start:step:stop]::Vector{Float32} + he = open_dataset(group, "land_segments/canopy/h_canopy_uncertainty")[start:step:stop]::Vector{Float32} end x = open_dataset(group, "land_segments/longitude")[start:step:stop]::Vector{Float32} y = open_dataset(group, "land_segments/latitude")[start:step:stop]::Vector{Float32} @@ -153,83 +159,54 @@ function points( atlas_beam_type = read_attribute(group, "atlas_beam_type")::String spot_number = read_attribute(group, "atlas_spot_number")::String - if ground - gt = ( - longitude = x, - latitude = y, - height = zt, - height_error = tu, - datetime = times, - quality = .!Bool.(q), - phr = Bool.(phr), - sensitivity = sensitivity, - scattered = scattered, - saturated = saturated, - clouds = Bool.(clouds), - track = Fill(track, length(times)), - strong_beam = Fill(atlas_beam_type == "strong", length(times)), - classification = Fill("ground", length(times)), - height_reference = dem, - detector_id = Fill(parse(Int8, spot_number), length(times)), - ) - end - if canopy - ct = ( - longitude = x, - latitude = y, - height = zc, - height_error = cu, - datetime = times, - quality = .!Bool.(q), - phr = Bool.(phr), - sensitivity = sensitivity, - scattered = Int16.(scattered), - saturated = Int16.(saturated), - clouds = Bool.(clouds), - track = Fill(track, length(times)), - strong_beam = Fill(atlas_beam_type == "strong", length(times)), - classification = Fill("high_canopy", length(times)), - return_number = Fill(1, length(times)), - number_of_returns = Fill(2, length(times)), - height_reference = dem, - detector_id = Fill(parse(Int8, spot_number), length(times)), - ) - end - if canopy && ground - ct, gt - elseif canopy - (ct,) - elseif ground - (gt,) - else - () - end + asr = open_dataset(group, "land_segments/asr")[start:step:stop]::Vector{Float32} + nph = open_dataset(group, "land_segments/n_seg_ph")[start:step:stop]::Vector{Int32} + + nt = (; + longitude = x, + latitude = y, + height = h, + height_error = he, + datetime = times, + quality = .!Bool.(q), + phr = Bool.(phr), + sensitivity = sensitivity, + scattered = scattered, + saturated = saturated, + clouds = Bool.(clouds), + track = Fill(track, length(times)), + strong_beam = Fill(atlas_beam_type == "strong", length(times)), + classification = Fill(canopy ? "high_canopy" : "ground", length(times)), + height_reference = dem, + detector_id = Fill(parse(Int8, spot_number), length(times)), + reflectance = asr, + nphotons = nph, + ) + nt end function lines(granule::ICESat2_Granule{:ATL08}; tracks = icesat2_tracks, step = 100, quality = 1) dfs = Vector{NamedTuple}() - HDF5.h5open(granule.url, "r") do file + nts = HDF5.h5open(granule.url, "r") do file # t_offset = read(file, "ancillary_data/atlas_sdp_gps_epoch")[1]::Float64 + gps_offset - for track ∈ tracks - if haskey(file, track) && haskey(open_group(file, track), "land_segments") - group = open_group(file, track) - height = open_dataset(group, "land_segments/terrain/h_te_mean")[1:step:end]::Array{Float32,1} - longitude = open_dataset(group, "land_segments/longitude")[1:step:end]::Array{Float32,1} - latitude = open_dataset(group, "land_segments/latitude")[1:step:end]::Array{Float32,1} - # t = open_dataset(group, "land_segments/delta_time")[1:step:end]::Array{Float64,1} - # times = unix2datetime.(t .+ t_offset) - atlas_beam_type = read_attribute(group, "atlas_beam_type")::String + ftracks = filter(track -> haskey(file, track) && haskey(open_group(file, track), "land_segments"), tracks) + map(ftracks) do track + group = open_group(file, track) + height = open_dataset(group, "land_segments/terrain/h_te_mean")[1:step:end]::Array{Float32,1} + longitude = open_dataset(group, "land_segments/longitude")[1:step:end]::Array{Float32,1} + latitude = open_dataset(group, "land_segments/latitude")[1:step:end]::Array{Float32,1} + # t = open_dataset(group, "land_segments/delta_time")[1:step:end]::Array{Float64,1} + # times = unix2datetime.(t .+ t_offset) + atlas_beam_type = read_attribute(group, "atlas_beam_type")::String - height[height.==fill_value] .= NaN - line = Line(longitude, latitude, height) - # i = div(length(t), 2) + 1 - nt = (geom = line, track = track, strong_beam = atlas_beam_type == "strong", granule = granule.id) - push!(dfs, nt) - end + height[height.==fill_value] .= NaN + line = Line(longitude, latitude, height) + # i = div(length(t), 2) + 1 + (geom = line, track = track, strong_beam = atlas_beam_type == "strong", granule = granule.id) end end - dfs + return PartitionedTable(nts, granule) end function atl03_mapping(granule::ICESat2_Granule{:ATL08}) @@ -275,77 +252,49 @@ function _extrapoints( ) group = open_group(file, track) if ground - zt = vec(open_dataset(group, "land_segments/terrain/h_te_best_fit_20m")[1:step:end, :])::Array{Float32} - tu = repeat(open_dataset(group, "land_segments/terrain/h_te_uncertainty")[1:step:end]::Vector{Float32}, 5) + h = vec(open_dataset(group, "land_segments/terrain/h_te_best_fit_20m")[1:step:end, :])::Array{Float32} + he = repeat(open_dataset(group, "land_segments/terrain/h_te_uncertainty")[1:step:end]::Vector{Float32}, inner = 5) q = vec(open_dataset(group, "land_segments/terrain/subset_te_flag")[1:step:end, :]::Array{Int8}) + else + h = vec(open_dataset(group, "land_segments/canopy/h_canopy_20m")[:, 1:step:end])::Array{Float32} + he = repeat(open_dataset(group, "land_segments/canopy/h_canopy_uncertainty")[1:step:end]::Vector{Float32}, inner = 5) + q = vec(open_dataset(group, "land_segments/canopy/subset_can_flag")[:, 1:step:end]::Array{Int8}) end - if canopy - zc = vec(open_dataset(group, "land_segments/canopy/h_canopy_20")[1:step:end, :])::Array{Float32} - cu = repeat(open_dataset(group, "land_segments/canopy/h_canopy_uncertainty")[1:step:end]::Vector{Float32}, 5) - q = vec(open_dataset(group, "land_segments/canopy/subset_can_flag")[1:step:end, :]::Array{Int8}) - end - x = vec(open_dataset(group, "land_segments/longitude_20m")[1:step:end, :]::Array{Float32}) - y = vec(open_dataset(group, "land_segments/latitude_20m")[1:step:end, :]::Array{Float32}) - t = repeat(open_dataset(group, "land_segments/delta_time")[1:step:end]::Vector{Float64}, 5) - sensitivity = repeat(open_dataset(group, "land_segments/snr")[1:step:end]::Vector{Float32}, 5) - clouds = repeat(open_dataset(group, "land_segments/layer_flag")[1:step:end]::Vector{Int8}, 5) - scattered = repeat(open_dataset(group, "land_segments/msw_flag")[1:step:end]::Vector{Int8}, 5) - saturated = repeat(open_dataset(group, "land_segments/sat_flag")[1:step:end]::Vector{Int8}, 5) - phr = repeat(open_dataset(group, "land_segments/ph_removal_flag")[1:step:end]::Vector{Int8}, 5) - dem = repeat(open_dataset(group, "land_segments/dem_h")[1:step:end]::Vector{Float32}, 5) + x = vec(open_dataset(group, "land_segments/longitude_20m")[:, 1:step:end]::Array{Float32}) + y = vec(open_dataset(group, "land_segments/latitude_20m")[:, 1:step:end]::Array{Float32}) + t = repeat(open_dataset(group, "land_segments/delta_time")[1:step:end]::Vector{Float64}, inner = 5) + sensitivity = repeat(open_dataset(group, "land_segments/snr")[1:step:end]::Vector{Float32}, inner = 5) + clouds = repeat(open_dataset(group, "land_segments/layer_flag")[1:step:end]::Vector{Int8}, inner = 5) + scattered = repeat(open_dataset(group, "land_segments/msw_flag")[1:step:end]::Vector{Int8}, inner = 5) + saturated = repeat(open_dataset(group, "land_segments/sat_flag")[1:step:end]::Vector{Int8}, inner = 5) + phr = repeat(open_dataset(group, "land_segments/ph_removal_flag")[1:step:end]::Vector{Int8}, inner = 5) + dem = repeat(open_dataset(group, "land_segments/dem_h")[1:step:end]::Vector{Float32}, inner = 5) times = unix2datetime.(t .+ t_offset) atlas_beam_type = read_attribute(group, "atlas_beam_type")::String spot_number = read_attribute(group, "atlas_spot_number")::String - # This is something to use, reduce(vcat,Fill.($x, $v)); + asr = repeat(open_dataset(group, "land_segments/asr")[1:step:end]::Vector{Float32}, inner = 5) + nph = repeat(open_dataset(group, "land_segments/n_seg_ph")[1:step:end]::Vector{Int32}, inner = 5) - if ground - gt = ( - longitude = x, - latitude = y, - height = zt, - height_error = tu, - datetime = times, - quality = q, - phr = Int16.(phr), - sensitivity = sensitivity, - scattered = Int16.(scattered), - saturated = Int16.(saturated), - clouds = Bool.(clouds), - track = Fill(track, length(times)), - strong_beam = Fill(atlas_beam_type == "strong", length(times)), - classification = Fill("ground", length(times)), - height_reference = dem, - detector_id = Fill(parse(Int8, spot_number), length(times)), - ) - end - if canopy - ct = ( - longitude = x, - latitude = y, - height = zc, - height_error = cu, - datetime = times, - quality = q, - phr = Int16.(phr), - sensitivity = sensitivity, - scattered = Int16.(scattered), - saturated = Int16.(saturated), - clouds = Bool.(clouds), - track = Fill(track, length(times)), - strong_beam = Fill(atlas_beam_type == "strong", length(times)), - classification = Fill("high_canopy", length(times)), - height_reference = dem, - detector_id = Fill(parse(Int8, spot_number), length(times)), - ) - end - if canopy && ground - ct, gt - elseif canopy - (ct,) - elseif ground - (gt,) - else - () - end + nt = ( + longitude = x, + latitude = y, + height = h, + height_error = he, + datetime = times, + quality = q, + phr = Bool.(phr), + sensitivity = sensitivity, + scattered = scattered, + saturated = saturated, + clouds = Bool.(clouds), + track = Fill(track, length(times)), + strong_beam = Fill(atlas_beam_type == "strong", length(times)), + classification = Fill(canopy ? "high_canopy" : "ground", length(times)), + height_reference = dem, + detector_id = Fill(parse(Int8, spot_number), length(times)), + reflectance = asr, + nphotons = nph, + ) + nt end diff --git a/src/ICESat-2/ATL12.jl b/src/ICESat-2/ATL12.jl index 7c60fde..fbb3e23 100644 --- a/src/ICESat-2/ATL12.jl +++ b/src/ICESat-2/ATL12.jl @@ -18,18 +18,19 @@ You can combine the output in a `DataFrame` with `reduce(vcat, DataFrame.(points want to change the default arguments or `DataFrame(g)` with the default options. """ function points(granule::ICESat2_Granule{:ATL12}, tracks = icesat2_tracks) - dfs = Vector{NamedTuple}() - HDF5.h5open(granule.url, "r") do file + nts = HDF5.h5open(granule.url, "r") do file t_offset = open_dataset(file, "ancillary_data/atlas_sdp_gps_epoch")[1] + gps_offset - for track ∈ tracks - if haskey(file, track) && haskey(open_group(file, track), "ssh_segments") && haskey(open_group(file, "$track/ssh_segments"), "heights") - track_df = points(granule, file, track, t_offset) - push!(dfs, track_df) - end + ftracks = filter( + track -> + haskey(file, track) && haskey(open_group(file, track), "ssh_segments") && haskey(open_group(file, "$track/ssh_segments"), "heights"), + tracks, + ) + map(ftracks) do track + points(granule, file, track, t_offset) end end - dfs + PartitionedTable(nts, granule) end function points( diff --git a/src/ICESat-2/ICESat-2.jl b/src/ICESat-2/ICESat-2.jl index 897ee15..05160da 100644 --- a/src/ICESat-2/ICESat-2.jl +++ b/src/ICESat-2/ICESat-2.jl @@ -33,7 +33,7 @@ end Retrieves the bounding box of the granule. !!! warning - + This opens the .h5 file, so it is slow. # Example @@ -159,7 +159,7 @@ const ascending_segments = [true, true, true, true, false, false, false, false, const descending_segments = [false, false, false, true, true, true, true, true, true, true, true, false, false, false] function icesat2_info(filename) - id, _ = splitext(filename) + id, _ = splitext(basename(filename)) type, datetime, track, version, revision = split(id, "_") segment = parse(Int, track[7:end]) ( diff --git a/src/ICESat/GLAH06.jl b/src/ICESat/GLAH06.jl index ac9ffca..25e7457 100644 --- a/src/ICESat/GLAH06.jl +++ b/src/ICESat/GLAH06.jl @@ -55,7 +55,7 @@ function points( quality = Bool[], height_reference = Float64[], ) - return gt + return Table(gt, granule) end # only include x and y data within bbox @@ -111,6 +111,6 @@ function points( (saturation_correction .< 3), height_reference = height_ref, ) - return gt + return Table(gt, granule) end end diff --git a/src/ICESat/GLAH14.jl b/src/ICESat/GLAH14.jl index f32bac5..2f3965d 100644 --- a/src/ICESat/GLAH14.jl +++ b/src/ICESat/GLAH14.jl @@ -63,7 +63,7 @@ function points( attitude = Int8[], saturation = Int8[], ) - return gt + return Table(gt, granule) end # only include x and y data within bbox @@ -110,7 +110,7 @@ function points( pts = Proj.proj_trans.(pipe, Proj.PJ_FWD, zip(x, y, height)) height = [x[3] for x in pts]::Vector{Float64} - gt = ( + gt = (; longitude = x, latitude = y, height = height, @@ -128,6 +128,6 @@ function points( attitude = sigma_att_flg, saturation = sat_corr_flag, ) - return gt + return Table(gt, granule) end end diff --git a/src/ICESat/ICESat.jl b/src/ICESat/ICESat.jl index 47de218..67c2c8e 100644 --- a/src/ICESat/ICESat.jl +++ b/src/ICESat/ICESat.jl @@ -46,7 +46,7 @@ function info(g::ICESat_Granule) end function icesat_info(filename) - id, _ = splitext(filename) + id, _ = splitext(basename(filename)) type, revision, orbit, cycle, track, segment, version, filetype = split(id, "_") return ( diff --git a/src/SpaceLiDAR.jl b/src/SpaceLiDAR.jl index 72d82c0..145442b 100644 --- a/src/SpaceLiDAR.jl +++ b/src/SpaceLiDAR.jl @@ -11,6 +11,7 @@ using TableOperations: joinpartitions using DataFrames using TimeZones using Extents +import DataAPI include("granule.jl") include("utils.jl") diff --git a/src/granule.jl b/src/granule.jl index ab3bf01..c62acbc 100644 --- a/src/granule.jl +++ b/src/granule.jl @@ -50,6 +50,14 @@ function _s3_download(url, fn, config = create_aws_config()) end abstract type Granule end +Base.:(==)(a::Granule, b::Granule) = a.id == b.id + +Base.show(io::IO, g::Granule) = _show(io, g) +Base.show(io::IO, ::MIME"text/plain", g::Granule) = _show(io, g) +function _show(io, g::T) where {T<:Granule} + print(io, "$T with id $(g.id)") +end + MultiPolygonType = Vector{Vector{Vector{Vector{Float64}}}} diff --git a/src/search.jl b/src/search.jl index ad301a5..e70e1c6 100644 --- a/src/search.jl +++ b/src/search.jl @@ -12,7 +12,7 @@ prefix(::Mission{:ICESat2}) = "ATL" prefix(::Mission{:GEDI}) = "GEDI" mission(::Mission{T}) where {T} = T -const url = "https://cmr.earthdata.nasa.gov/search/granules.umm_json_v1_6_4" +const earthdata_url = "https://cmr.earthdata.nasa.gov/search/granules.umm_json_v1_6_4" """ search(mission::Mission, bbox::Extent) @@ -24,14 +24,20 @@ function search( m::Mission{:GEDI}, product::Symbol = :GEDI02_A; bbox::Extent = world, + extent::Extent = world, version::Int = 2, before::Union{Nothing,DateTime} = nothing, after::Union{Nothing,DateTime} = nothing, provider::String = "LPDAAC_ECS", )::Vector{GEDI_Granule} startswith(string(product), prefix(m)) || throw(ArgumentError("Wrong product $product for $(mission(m)) mission.")) + if bbox != world + Base.depwarn("Use of `bbox` is deprecated, please use `extent` instead.", :search) + extent = bbox + end + granules = - earthdata_search(short_name = string(product), bounding_box = bbox, version = version, provider = provider, before = before, after = after) + earthdata_search(short_name = string(product), bounding_box = extent, version = version, provider = provider, before = before, after = after) length(granules) == 0 && @warn "No granules found, did you specify the correct parameters, such as version?" filter!(g -> !isnothing(g.https_url), granules) map( @@ -48,6 +54,7 @@ function search( m::Mission{:ICESat2}, product::Symbol = :ATL03; bbox::Extent = world, + extent::Extent = world, version::Int = 6, before::Union{Nothing,DateTime} = nothing, after::Union{Nothing,DateTime} = nothing, @@ -55,8 +62,13 @@ function search( provider::String = s3 ? "NSIDC_CPRD" : "NSIDC_ECS", )::Vector{ICESat2_Granule} startswith(string(product), prefix(m)) || throw(ArgumentError("Wrong product $product for $(mission(m)) mission.")) + if bbox != world + Base.depwarn("Use of `bbox` is deprecated, please use `extent` instead.", :search) + extent = bbox + end + granules = - earthdata_search(short_name = string(product), bounding_box = bbox, version = version, provider = provider, before = before, after = after) + earthdata_search(short_name = string(product), bounding_box = extent, version = version, provider = provider, before = before, after = after) length(granules) == 0 && @warn "No granules found, did you specify the correct parameters, such as version?" s3 ? filter!(g -> !isnothing(g.s3_url), granules) : filter!(g -> !isnothing(g.https_url), granules) map( @@ -73,6 +85,7 @@ function search( m::Mission{:ICESat}, product::Symbol = :GLAH14; bbox::Extent = world, + extent::Extent = world, version::Int = 34, before::Union{Nothing,DateTime} = nothing, after::Union{Nothing,DateTime} = nothing, @@ -80,8 +93,13 @@ function search( provider::String = s3 ? "NSIDC_CPRD" : "NSIDC_ECS", )::Vector{ICESat_Granule} startswith(string(product), prefix(m)) || throw(ArgumentError("Wrong product $product for $(mission(m)) mission.")) + if bbox != world + Base.depwarn("Use of `bbox` is deprecated, please use `extent` instead.", :search) + extent = bbox + end + granules = - earthdata_search(short_name = string(product), bounding_box = bbox, version = version, provider = provider, before = before, after = after) + earthdata_search(short_name = string(product), bounding_box = extent, version = version, provider = provider, before = before, after = after) length(granules) == 0 && @warn "No granules found, did you specify the correct parameters, such as version?" s3 ? filter!(g -> !isnothing(g.s3_url), granules) : filter!(g -> !isnothing(g.https_url), granules) map( @@ -209,7 +227,7 @@ function earthdata_search(; q["temporal"] = "$(isnothing(after) ? "" : after),$(isnothing(before) ? "" : before)" end - qurl = umm ? url : replace(url, "umm_json_v1_6_4" => "json") + qurl = umm ? earthdata_url : replace(earthdata_url, "umm_json_v1_6_4" => "json") r = HTTP.get(qurl, query = q, verbose = verbose, status_exception = false) HTTP.iserror(r) && error(parse_cmr_error(r)) parsef = umm ? parse_cmr_ummjson : parse_cmr_json diff --git a/src/table.jl b/src/table.jl index e0049cf..a0798b0 100644 --- a/src/table.jl +++ b/src/table.jl @@ -1,12 +1,117 @@ +abstract type AbstractTable end +struct Table{K,V,G} <: AbstractTable + table::NamedTuple{K,V} + granule::G + function Table(table::NamedTuple{K,V}, g::G) where {K,V,G} + new{K,typeof(values(table)),G}(table, g) + end +end +_table(t::Table) = getfield(t, :table) +_granule(t::AbstractTable) = getfield(t, :granule) +Base.size(table::Table) = size(_table(table)) +Base.getindex(t::Table, i) = _table(t)[i] +Base.show(io::IO, t::Table) = _show(io, t) +Base.show(io::IO, ::MIME"text/plain", t::Table) = _show(io, t) +Base.haskey(table::Table, x) = haskey(_table(table), x) +Base.keys(table::Table) = keys(_table(table)) +Base.values(table::Table) = values(_table(table)) +Base.length(table::Table) = length(_table(table)) +Base.iterate(table::Table, args...) = iterate(_table(table), args...) +Base.merge(table::Table, others...) = Table(merge(_table(table), others...)) +Base.parent(table::Table) = _table(table) + +function Base.getproperty(table::Table, key::Symbol) + getproperty(_table(table), key) +end + +function _show(io, t::Table) + print(io, "Table of $(_granule(t))") +end + +struct PartitionedTable{N,K,V,G} <: AbstractTable + tables::NTuple{N,NamedTuple{K,V}} + granule::G +end +PartitionedTable(t::NamedTuple) = PartitionedTable((t,)) +Base.size(t::PartitionedTable) = (length(t.tables),) +Base.length(t::PartitionedTable{N}) where {N} = N +Base.getindex(t::PartitionedTable, i) = t.tables[i] +Base.lastindex(t::PartitionedTable{N}) where {N} = N +Base.show(io::IO, t::PartitionedTable) = _show(io, t) +Base.show(io::IO, ::MIME"text/plain", t::PartitionedTable) = _show(io, t) +Base.iterate(table::PartitionedTable, args...) = iterate(table.tables, args...) +Base.merge(table::PartitionedTable, others...) = PartitionedTable(merge.(table.tables, Ref(others...))) +Base.parent(table::PartitionedTable) = collect(table.tables) + +function _show(io, t::PartitionedTable) + print(io, "Table with $(length(t.tables)) partitions of $(_granule(t))") +end + +function add_info(table::PartitionedTable) + it = info(table.granule) + nts = map(table.tables) do t + nt = NamedTuple(zip(keys(it), Fill.(values(it), length(first(t))))) + merge(t, nt) + end + return PartitionedTable(nts, table.granule) +end + +function add_id(table::PartitionedTable) + nts = map(table.tables) do t + nt = (; id = Fill(table.granule.id, length(first(t)))) + merge(t, nt) + end + return PartitionedTable(nts, table.granule) +end + +function add_id(table::Table) + g = _granule(table) + t = _table(table) + nt = (; id = Fill(g.id, length(first(t)))) + nts = merge(t, nt) + return Table(nts, g) +end + +function add_info(table::Table) + g = _granule(table) + it = info(g) + t = _table(table) + nt = NamedTuple(zip(keys(it), Fill.(values(it), length(first(t))))) + nts = merge(t, nt) + return Table(nts, g) +end + +_info(g::Granule) = merge((; id = g.id), info(g)) + +DataAPI.metadatasupport(::Type{<:AbstractTable}) = (read = true, write = false) +DataAPI.metadatakeys(t::AbstractTable) = map(String, keys(pairs(_info(_granule(t))))) +function DataAPI.metadata(t::AbstractTable, k; style::Bool = false) + if style + getfield(_info(_granule(t)), Symbol(k)), :default + else + getfield(_info(_granule(t)), Symbol(k)) + end +end + Tables.istable(::Type{<:SpaceLiDAR.Granule}) = true Tables.columnaccess(::Type{<:SpaceLiDAR.Granule}) = true Tables.partitions(g::SpaceLiDAR.Granule) = points(g) Tables.columns(g::SpaceLiDAR.Granule) = Tables.CopiedColumns(joinpartitions(g)) +Tables.istable(::Type{<:SpaceLiDAR.PartitionedTable}) = true +Tables.columnaccess(::Type{<:SpaceLiDAR.PartitionedTable}) = true +Tables.partitions(g::SpaceLiDAR.PartitionedTable) = getfield(g, :tables) +Tables.columns(g::SpaceLiDAR.PartitionedTable) = Tables.CopiedColumns(joinpartitions(g)) + # ICESat has no beams, so no need for partitions Tables.istable(::Type{<:SpaceLiDAR.ICESat_Granule}) = true Tables.columnaccess(::Type{<:SpaceLiDAR.ICESat_Granule}) = true -Tables.columns(g::SpaceLiDAR.ICESat_Granule) = points(g) +Tables.columns(g::SpaceLiDAR.ICESat_Granule) = getfield(points(g), :table) + +Tables.istable(::Type{<:SpaceLiDAR.Table}) = true +Tables.columnaccess(::Type{<:SpaceLiDAR.Table}) = true +Tables.columns(g::SpaceLiDAR.Table) = getfield(g, :table) + function materialize!(df::DataFrame) for (name, col) in zip(names(df), eachcol(df)) diff --git a/src/utils.jl b/src/utils.jl index 96c6f1d..ce90e59 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -2,12 +2,12 @@ using Printf using DataFrames """ - granule_from_file(filename::AbstractString) + granule(filename::AbstractString) Create a mission specific granule from a local .h5 filepath. For folder usage see -[`granules_from_folder`](@ref). +[`granules`](@ref). """ -function granule_from_file(filename::AbstractString) +function granule(filename::AbstractString) _, ext = splitext(filename) lowercase(ext) != ".h5" && error("Granule must be a .h5 file") @@ -25,18 +25,20 @@ function granule_from_file(filename::AbstractString) error("Unknown granule.") end end +@deprecate granule_from_file(filename::AbstractString) granule(filename::AbstractString) """ - granules_from_folder(foldername::AbstractString) + granules(foldername::AbstractString) -Create mission specific granules from a folder with .h5 files, using [`granule_from_file`](@ref). +Create mission specific granules from a folder with .h5 files, using [`granule`](@ref). """ -function granules_from_folder(foldername::AbstractString) +function granules(foldername::AbstractString) return [ - granule_from_file(joinpath(foldername, file)) for + granule(joinpath(foldername, file)) for file in readdir(foldername) if lowercase(splitext(file)[end]) == ".h5" ] end +@deprecate granules_from_folder(foldername::AbstractString) granules(foldername::AbstractString) """ instantiate(granules::Vector{::Granule}, folder::AbstractString) @@ -91,14 +93,24 @@ function in_bbox(g::Vector{G}, bbox::NamedTuple{(:min_x, :min_y, :max_x, :max_y) g[m] end -function write_granule_urls!(fn::String, granules::Vector{<:Granule}) +url(g::Granule) = g.url +urls(g::Vector{<:Granule}) = getfield.(g, :url) + +""" + write_urls(fn::String, granules::Vector{<:Granule}) + +Write all granule urls to a file. +""" +function write_urls(fn::String, granules::Vector{<:Granule}) open(fn, "w") do f for granule in granules - println(f, granule.url) + println(f, url(granule)) end end abspath(fn) end +@deprecate write_granule_urls! write_urls + """ isvalid(g::Granule) @@ -108,7 +120,7 @@ Checks if a granule is has a valid, local and non-corrupt .h5 file. Can be combi """ function isvalid(granule::Granule) try - HDF5.h5open(granule.url, "r") do file + HDF5.h5open(url(granule), "r") do file keys(file) end return true diff --git a/test/runtests.jl b/test/runtests.jl index a81cc95..714b678 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,6 +11,7 @@ using Documenter using Extents using GeoInterface using CategoricalArrays +using DataAPI const rng = MersenneTwister(54321) const SL = SpaceLiDAR @@ -60,48 +61,71 @@ empty_bbox = (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0) end @testset "search" begin - @test length(find(:ICESat, "GLAH06", (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0))) > 0 - @test length(find(:ICESat, "GLAH14", (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0))) > 0 - @test length(find(:ICESat2, "ATL03", (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0))) > 0 - @test length(find(:ICESat2, "ATL06", (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0))) > 0 - @test length(find(:ICESat2, "ATL08", (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0))) > 0 - granules = find(:GEDI, "GEDI02_A", (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0)) + bbox = (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0) + ex = convert(Extent, bbox) + + # Deprecation + @test length(find(:ICESat, "GLAH06", bbox)) > 0 + @test length(search(:ICESat, :GLAH06, bbox = ex)) > 0 + + @test length(search(:ICESat, :GLAH06, extent = ex)) > 0 + @test length(search(:ICESat, :GLAH14, extent = ex)) > 0 + @test length(search(:ICESat2, :ATL03, extent = ex)) > 0 + @test length(search(:ICESat2, :ATL06, extent = ex)) > 0 + @test length(search(:ICESat2, :ATL08, extent = ex)) > 0 + granules = search(:GEDI, :GEDI02_A, extent = ex) @test length(granules) > 0 @test length(granules[1].polygons) > 0 @test_throws ArgumentError find(:ICESat2, "GLAH14") @test_throws ArgumentError find(:Foo, "GLAH14") + # Time @test length(SpaceLiDAR.search(:ICESat2, :ATL08, after = DateTime(2019, 12, 12), before = DateTime(2019, 12, 13))) == 161 @test length(SpaceLiDAR.search(:ICESat2, :ATL08, before = now() - Year(5))) == 0 @test length(SpaceLiDAR.search(:ICESat2, :ATL08, after = now())) == 0 @test_throws ErrorException SpaceLiDAR.search(:ICESat2, :ATL08, after = now() - Month(47), before = now() - Month(48)) end - @testset "download" begin - if "EARTHDATA_USER" in keys(ENV) - @info "Setting up Earthdata credentials for Github Actions" - SpaceLiDAR.netrc!( - get(ENV, "EARTHDATA_USER", ""), - get(ENV, "EARTHDATA_PW", ""), - ) - end - granules = search(:ICESat, :GLAH06, bbox = convert(Extent, (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0))) - g = granules[1] - SL.download!(g) - @test isfile(g) - rm(g) - - # This only works on us-west-2 region in AWS - # granules = search(:ICESat2, :ATL08, bbox = convert(Extent, (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0)), s3 = true) - # g = granules[1] - # SL.download!(g) - # @test isfile(g) - # rm(g) - end + # @testset "download" begin + # if "EARTHDATA_USER" in keys(ENV) + # @info "Setting up Earthdata credentials for Github Actions" + # SpaceLiDAR.netrc!( + # get(ENV, "EARTHDATA_USER", ""), + # get(ENV, "EARTHDATA_PW", ""), + # ) + # end + # granules = search(:ICESat, :GLAH06, bbox = convert(Extent, (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0))) + # g = granules[1] + + # try + # SL.download!(g) + # @test isfile(g) + # catch e + # if e isa Downloads.RequestError + # @error "Could not download granule due to network error(s)" + # else + # rethrow(e) + # end + # end + # rm(g) + + # # This only works on us-west-2 region in AWS + # # granules = search(:ICESat2, :ATL08, bbox = convert(Extent, (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0)), s3 = true) + # # g = granules[1] + # # SL.download!(g) + # # @test isfile(g) + # # rm(g) + # end @testset "granules" begin - gs = SL.granules_from_folder("data") + og = SL.granule_from_file(GLAH06_fn) + g = SL.granule(GLAH06_fn) + @test og == g # deprecation + + ogs = SL.granules_from_folder("data") + gs = SL.granules("data") + @test ogs == gs # deprecation @test length(gs) == 7 copies = copy.(gs) @@ -111,10 +135,11 @@ empty_bbox = (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0) end @testset "GLAH06" begin - g = SL.granule_from_file(GLAH06_fn) + g = SL.granule(GLAH06_fn) bbox = (min_x = 131.0, min_y = -40, max_x = 132, max_y = -30) points = SL.points(g; bbox = bbox) + @test points isa SL.AbstractTable @test length(points.latitude) == 287 @test points.quality[1] == true @@ -127,9 +152,10 @@ empty_bbox = (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0) end @testset "GLAH14" begin - g = SL.granule_from_file(GLAH14_fn) + g = SL.granule(GLAH14_fn) points = SL.points(g) + @test points isa SL.AbstractTable @test length(points) == 11 bbox = (min_x = -20.0, min_y = -85, max_x = -2, max_y = 20) @@ -146,10 +172,11 @@ empty_bbox = (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0) end @testset "ATL03" begin - g = SL.granule_from_file(ATL03_fn) - g8 = SL.granule_from_file(ATL08_fn) + g = SL.granule(ATL03_fn) + g8 = SL.granule(ATL08_fn) points = SL.points(g) + @test points isa SL.AbstractTable @test length(points) == 6 @test points[1].strong_beam[1] == true @test points[1].track[1] == "gt1l" @@ -176,8 +203,9 @@ empty_bbox = (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0) end @testset "ATL06" begin - g6 = SL.granule_from_file(ATL06_fn) + g6 = SL.granule(ATL06_fn) points = SL.points(g6) + @test points isa SL.AbstractTable fpoints = SL.points(g6, step = 1000) @test length(points) == 6 @test length(fpoints) == 6 @@ -194,11 +222,12 @@ empty_bbox = (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0) end @testset "ATL08" begin - g = SL.granule_from_file(ATL08_fn) + g = SL.granule(ATL08_fn) fpoints = SL.points(g, step = 1000) @test length(fpoints) == 6 points = SL.points(g, step = 1) + @test points isa SL.AbstractTable @test length(points) == 6 @test length(points[1].longitude) == 998 @test points[1].longitude[356] ≈ 175.72562f0 @@ -223,9 +252,10 @@ empty_bbox = (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0) end @testset "ATL12" begin - g12 = SL.granule_from_file(ATL12_fn) + g12 = SL.granule(ATL12_fn) points = SL.points(g12) + @test points isa SL.AbstractTable @test length(points) == 6 df = reduce(vcat, DataFrame.(points)) @@ -234,9 +264,10 @@ empty_bbox = (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0) end @testset "L2A" begin - gg = SL.granule_from_file(GEDI02_fn) + gg = SL.granule(GEDI02_fn) points = SL.points(gg, step = 1000) + @test points isa SL.AbstractTable @test length(points) == 8 @test points[2].strong_beam[1] == false @test points[4].strong_beam[1] == false @@ -279,11 +310,11 @@ empty_bbox = (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0) end @testset "Angle" begin - g = SL.granule_from_file(ATL08_fn) + g = SL.granule(ATL08_fn) @test isapprox(SL.track_angle(g, 0), -1.992, atol = 1e-3) @test isapprox(SL.track_angle(g, 88), -90.0, atol = 1e-3) - gg = SL.granule_from_file(GEDI02_fn) + gg = SL.granule(GEDI02_fn) @test isapprox(SL.track_angle(gg, 0), 38.249, atol = 1e-3) @test isapprox(SL.track_angle(gg, 51.6443), 86.075, atol = 1e-3) end @@ -301,8 +332,8 @@ empty_bbox = (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0) @test pts[1][3] ≈ -0.700000000 end - @testset "Tables" begin - g3 = SL.granule_from_file(ATL03_fn) + @testset "Tables on granule" begin + g3 = SL.granule(ATL03_fn) @test Tables.istable(g3) @test Tables.columnaccess(g3) t = Tables.columntable(g3) @@ -314,15 +345,48 @@ empty_bbox = (min_x = 4.0, min_y = 40.0, max_x = 5.0, max_y = 50.0) SL.in_bbox!(df, (min_x = 0.0, min_y = 0.0, max_x = 1.0, max_y = 1.0)) @test length(df.longitude) == 0 - g14 = SL.granule_from_file(GLAH14_fn) + g14 = SL.granule(GLAH14_fn) @test Tables.istable(g14) t = Tables.columntable(g14) @test length(t.longitude) == 729117 + end + @testset "Table from points" begin + # PartionedTable + g = SL.granule(ATL08_fn) + points = SL.points(g, step = 1) + @test points isa SL.AbstractTable + @test points isa SL.PartitionedTable + + t = SL.add_info(points) + tt = SL.add_id(t) + + df = DataFrame(tt) + first(df.id) == g.id + first(df.version) == 6 + @test metadata(df) == metadata(points) + metadata(df)["id"] == g.id + metadata(df)["version"] == 6 + + # Single table + g = SL.granule(GLAH14_fn) + points = SL.points(g, step = 1) + @test points isa SL.AbstractTable + @test points isa SL.Table + + t = SL.add_info(points) + tt = SL.add_id(t) + + df = DataFrame(tt) + first(df.id) == g.id + first(df.version) == 32 + @test metadata(df) == metadata(points) + metadata(df)["id"] == g.id + metadata(df)["version"] == 32 end @testset "GeoInterface" begin - g = SL.granule_from_file(ATL08_fn) + g = SL.granule(ATL08_fn) GeoInterface.testgeometry(g) end end