Skip to content

Commit

Permalink
Make tile opens be fully lazy.
Browse files Browse the repository at this point in the history
Though I had previously made tile reads lazy for
forest_change_diagnostic, tiles were still be opened even when they were
not actually needed for the analysis, because of bucket accesses in
checkSources().

I moved the assertions in checkSources() to the beginning of the
fetchWindow() implementations, so getSources() no longer needs to open
the associated tiles. A tile is now only opened exactly when
fetchWindow() is called, so there are no file accesses for a particular
dataset tile unless it is actually used in the summary computation.

As an optimization, I moved code around in ForestChangeDiagnosticSummary
so that argOTBN and braBiomes tiles are only accessed for pixels in ARG
or BRA respectively. prodesLossYear is still accessed all the time, as
one extra way of deciding if a pixel is in Brazil. The test output
changes are just re-ordering of categories in the output.

I will use this full laziness for an upcoming optimization for
gfw_dashboard (where we will sometimes use the raster gadm datasets).
  • Loading branch information
danscales committed Jul 26, 2024
1 parent 477d87b commit 03615bc
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 63 deletions.
49 changes: 5 additions & 44 deletions src/main/scala/org/globalforestwatch/grids/Grid.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package org.globalforestwatch.grids
import geotrellis.raster.TileLayout
import geotrellis.layer.{LayoutDefinition, SpatialKey}
import geotrellis.vector.Extent
import org.globalforestwatch.layers.{OptionalLayer, RequiredLayer}

trait Grid[T <: GridSources] {

Expand Down Expand Up @@ -56,55 +55,17 @@ trait Grid[T <: GridSources] {
LayoutDefinition(gridExtent, tileLayout)
}

// Get the set of grid sources (subclass of GridSources) associated with specified
// grid tile and the configuration/catalog in kwargs.
def getSources(gridTile: GridTile, kwargs: Map[String, Any]): T

def checkSources(gridTile: GridTile, windowExtent: Extent, windowKey: SpatialKey, windowLayout: LayoutDefinition, kwargs: Map[String, Any]): T = {

def ccToMap(cc: AnyRef): Map[String, Any] =
cc.getClass.getDeclaredFields.foldLeft(Map.empty[String, Any]) { (a, f) =>
f.setAccessible(true)
a + (f.getName -> f.get(cc))
}

val sources: T = getSources(gridTile, kwargs)

val sourceMap = ccToMap(sources)

for ((k, v) <- sourceMap) {

v match {
case s: RequiredLayer => checkRequired(s, windowExtent)
case s: OptionalLayer => checkOptional(s, windowExtent)
case _ => Unit
}
}

sources
}

// NOTE: This check will cause an eager fetch of raster metadata
def checkRequired(layer: RequiredLayer, windowExtent: Extent): Unit = {
require(
layer.source.extent.intersects(windowExtent),
s"${layer.uri} does not intersect: $windowExtent"
)
}

// Only check these guys if they're defined
def checkOptional(layer: OptionalLayer, windowExtent: Extent): Unit = {
layer.source.foreach { source =>
require(
source.extent.intersects(windowExtent),
s"${layer.uri} does not intersect: $windowExtent"
)
}
}

// Get the set of grid sources (subclass of GridSources) associated with specified
// windowKey and windowLayout and the configuration/catalog in kwargs.
def getRasterSource(windowKey: SpatialKey, windowLayout: LayoutDefinition, kwargs: Map[String, Any]): T = {
val windowExtent: Extent = windowKey.extent(windowLayout)
val gridId = GridId.pointGridId(windowExtent.center, gridSize)
val gridTile = GridTile(gridSize, rowCount, blockSize, gridId)

checkSources(gridTile, windowExtent: Extent, windowKey: SpatialKey, windowLayout: LayoutDefinition, kwargs: Map[String, Any])
getSources(gridTile, kwargs)
}
}
15 changes: 12 additions & 3 deletions src/main/scala/org/globalforestwatch/layers/Layer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,9 @@ trait FLayer extends Layer {
trait RequiredLayer extends Layer {

/**
* Define how to read sources for required layers
* Define how to read sources for required layers. source is only evaluated (and
* so we only check the the associated tile exists) when fetchWindow is first
* called, and then source is used to fetch the relevant tile.
*/
lazy val source: GDALRasterSource = {
// Removes the expected 404 errors from console log
Expand Down Expand Up @@ -234,6 +236,7 @@ trait RequiredILayer extends RequiredLayer with ILayer {
*/
def fetchWindow(windowKey: SpatialKey,
windowLayout: LayoutDefinition): ITile = {
require(source.extent.intersects(windowKey.extent(windowLayout)))
val layoutSource = LayoutTileSource.spatial(source, windowLayout)
// println(s"Fetching required int tile ${source.dataPath.value}, key ${windowKey}")
val tile = source.synchronized {
Expand All @@ -251,6 +254,7 @@ trait RequiredDLayer extends RequiredLayer with DLayer {
*/
def fetchWindow(windowKey: SpatialKey,
windowLayout: LayoutDefinition): DTile = {
require(source.extent.intersects(windowKey.extent(windowLayout)))
val layoutSource = LayoutTileSource.spatial(source, windowLayout)
// println(s"Fetching required int tile ${source.dataPath.value}, key ${windowKey}")
val tile = source.synchronized {
Expand All @@ -268,6 +272,7 @@ trait RequiredFLayer extends RequiredLayer with FLayer {
*/
def fetchWindow(windowKey: SpatialKey,
windowLayout: LayoutDefinition): FTile = {
require(source.extent.intersects(windowKey.extent(windowLayout)))
val layoutSource = LayoutTileSource.spatial(source, windowLayout)
// println(s"Fetching required float tile ${source.dataPath.value}, key ${windowKey}")
val tile = source.synchronized {
Expand All @@ -281,9 +286,10 @@ trait RequiredFLayer extends RequiredLayer with FLayer {
trait OptionalLayer extends Layer {

/**
* Define how to read sources for optional Layers
* Define how to read sources for optional Layers. Check if URI exists before
* trying to open it, return None if no file found. source is only evaluated when
* fetchWindow is first called, and then is used to fetch the relevant tile.
*/
/** Check if URI exists before trying to open it, return None if no file found */
lazy val source: Option[GDALRasterSource] = {
// Removes the expected 404 errors from console log

Expand Down Expand Up @@ -326,6 +332,7 @@ trait OptionalILayer extends OptionalLayer with ILayer {
*/
def fetchWindow(windowKey: SpatialKey,
windowLayout: LayoutDefinition): OptionalITile = {
source.foreach(s => require(s.extent.intersects(windowKey.extent(windowLayout))))
// source.foreach(s => println(s"Fetching optional int tile ${s.dataPath.value}, key ${windowKey}"))
new OptionalITile(for {
source <- source
Expand All @@ -349,6 +356,7 @@ trait OptionalDLayer extends OptionalLayer with DLayer {
*/
def fetchWindow(windowKey: SpatialKey,
windowLayout: LayoutDefinition): OptionalDTile = {
source.foreach(s => require(s.extent.intersects(windowKey.extent(windowLayout))))
// source.foreach(s => println(s"Fetching optional double tile ${s.dataPath.value}, key ${windowKey}"))
new OptionalDTile(for {
source <- source
Expand All @@ -372,6 +380,7 @@ trait OptionalFLayer extends OptionalLayer with FLayer {
*/
def fetchWindow(windowKey: SpatialKey,
windowLayout: LayoutDefinition): OptionalFTile = {
source.foreach(s => require(s.extent.intersects(windowKey.extent(windowLayout))))
// source.foreach(s => println(s"Fetching optional float tile ${s.dataPath.value}, key ${windowKey}"))
new OptionalFTile(for {
source <- source
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ trait ErrorSummaryRDD extends LazyLogging with java.io.Serializable {
featuresGroupedWithSummaries
}

// Get the grid sources (subclass of GridSources) associated with a specified
// window key and windowLayout.
def getSources(key: SpatialKey, windowLayout: LayoutDefinition, kwargs: Map[String, Any]): Either[Throwable, SOURCES]

def readWindow(rs: SOURCES, windowKey: SpatialKey, windowLayout: LayoutDefinition): Either[Throwable, Raster[TILE]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ trait SummaryRDD extends LazyLogging with java.io.Serializable {
featuresGroupedWithSummaries
}

// Get the grid sources (subclass of GridSources) associated with a specified
// window key and windowLayout.
def getSources(key: SpatialKey, windowLayout: LayoutDefinition, kwargs: Map[String, Any]): Either[Throwable, SOURCES]

def readWindow(rs: SOURCES, windowKey: SpatialKey, windowLayout: LayoutDefinition): Either[Throwable, Raster[TILE]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,28 +87,18 @@ object ForestChangeDiagnosticSummary {
val isPeatlands: Boolean = raster.tile.isPeatlands.getData(col, row)
val isIntactForestLandscapes2000: Boolean =
raster.tile.isIntactForestLandscapes2000.getData(col, row)
val prodesLossYear: Int = {
val loss = raster.tile.prodesLossYear.getData(col, row)
if (loss != null) {
loss.toInt
} else {
0
}
}

val region: Int = raster.tile.gfwProCoverage.getData(col, row)
val argPresence = GFWProCoverage.isArgentina(region)
val colPresence = GFWProCoverage.isColombia(region)
val braBiomesPresence = GFWProCoverage.isBrazilBiomesPresence(region)
val argOTBN: String = raster.tile.argOTBN.getData(col, row)
var argOTBN: String = ""

// We compute country-specific forest loss using argForestLoss tile for
// Argentina, and prodesLossYear for Brazil. In the very unusual case where a
// location covers more than one country, we don't want to mix
// country-specific forest losses, so we record the country-code that the
// forest loss came from. We will zero out the country-specific forest loss
// and mark it with an 'ERR' country code if we end up merging results from
// more than one country.
// forest loss came from.
var countrySpecificLossYear = ApproxYear(0, false)
var countryCode = ""
var classifiedRegion = ""
Expand All @@ -118,6 +108,7 @@ object ForestChangeDiagnosticSummary {
classifiedRegion = raster.tile.colFronteraAgricola.getData(col, row)
} else if (argPresence) {
countrySpecificLossYear = raster.tile.argForestLoss.getData(col, row)
argOTBN = raster.tile.argOTBN.getData(col, row)
classifiedRegion = argOTBN
countryCode = "ARG"
} else {
Expand All @@ -133,6 +124,20 @@ object ForestChangeDiagnosticSummary {
countryCode = "BRA"
}
}
val prodesLossYear: Int = {
if (countryCode == "BRA") {
countrySpecificLossYear.year
} else {
0
}
}
val braBiomes: String = {
if (braBiomesPresence) {
raster.tile.braBiomes.getData(col, row)
} else {
""
}
}

val seAsiaLandCover: String =
raster.tile.seAsiaLandCover.getData(col, row)
Expand All @@ -142,7 +147,6 @@ object ForestChangeDiagnosticSummary {
val idnForestArea: String = raster.tile.idnForestArea.getData(col, row)
val isIdnForestMoratorium: Boolean =
raster.tile.isIDNForestMoratorium.getData(col, row)
val braBiomes: String = raster.tile.braBiomes.getData(col, row)
val isPlantation: Boolean = raster.tile.isPlantation.getData(col, row)

// compute Booleans
Expand Down
Loading

0 comments on commit 03615bc

Please sign in to comment.