From 449b7d60803da1e9dc05bd92f20cb2bb7ba71c68 Mon Sep 17 00:00:00 2001 From: Dan Scales Date: Thu, 26 Dec 2024 06:13:29 -0800 Subject: [PATCH] GTC-3077 Give NoIntersectionError in AFi if geometry too small In the AFi analysis, we currently don't return any row at all for a location whose geometry does not intersect the centroid of any pixel (mostly because the geometry is small compared to the pixel size). Add similar code as in FCD to detect this case and return NoIntersectionError. Same change for GFWProDashboard as well, though need to check if that is really what we want. --- .../summarystats/afi/AFiAnalysis.scala | 12 +++++++++++- .../gfwpro_dashboard/GfwProDashboardAnalysis.scala | 14 +++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index 5b7a0c6e..041de6ab 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -11,6 +11,7 @@ import org.apache.spark.storage.StorageLevel import org.apache.spark.sql.functions._ import scala.collection.immutable.SortedMap import io.circe.syntax._ +import cats.data.Validated.{Invalid, Valid} object AFiAnalysis extends SummaryAnalysis { @@ -42,13 +43,22 @@ object AFiAnalysis extends SummaryAnalysis { import spark.implicits._ + // If a location has no AFiDataGroup entries, then the geometry must not have + // intersected the centroid of any pixels, so report the location as + // NoIntersectionError. + val summary1RDD = summaryRDD.map { + case Valid(Location(fid, data)) if data.isEmpty => + Invalid(Location(fid, NoIntersectionError)) + case data => data + } + // Null out gadm_id for all non-dissolved rows and then aggregate all results for // each unique (list_id, location_id, gadm_id, loss_year). Need to combine first // with key including loss_year, so we don't have duplicate loss year entries // when we create the map of loss years. val summary1DF = AFiAnalysis.aggregateByLossYear( AFiDF - .getFeatureDataFrame(summaryRDD, spark) + .getFeatureDataFrame(summary1RDD, spark) .withColumn( "gadm_id", when(col("location_id") =!= -1, lit("") ).otherwise(col("gadm_id")) ) diff --git a/src/main/scala/org/globalforestwatch/summarystats/gfwpro_dashboard/GfwProDashboardAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/gfwpro_dashboard/GfwProDashboardAnalysis.scala index 54e08847..7f2b2aa9 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/gfwpro_dashboard/GfwProDashboardAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/gfwpro_dashboard/GfwProDashboardAnalysis.scala @@ -18,6 +18,7 @@ import org.globalforestwatch.util.GeotrellisGeometryValidator.makeValidGeom import scala.collection.JavaConverters._ import java.time.LocalDate import org.globalforestwatch.util.IntersectGeometry +import cats.data.Validated.{Invalid, Valid} object GfwProDashboardAnalysis extends SummaryAnalysis { @@ -109,7 +110,18 @@ object GfwProDashboardAnalysis extends SummaryAnalysis { val validatedSummaryStatsRdd = GfwProDashboardRDD(tmp, GfwProDashboardGrid.blockTileGrid, kwargs + ("getRasterGadm" -> !doGadmIntersect)) - ValidatedWorkflow(validatedSummaryStatsRdd).mapValid { summaryStatsRDD => + validatedSummaryStatsRdd.collect().foreach(println) + + // If a location has no GfwProDashboardRawDataGroup entries, then the + // geometry must not have intersected the centroid of any pixels, so report + // the location as NoIntersectionError. + val validatedSummaryStatsRdd1 = validatedSummaryStatsRdd.map { + case Valid(Location(fid, data)) if data.isEmpty => + Invalid(Location(fid, NoIntersectionError)) + case data => data + } + + ValidatedWorkflow(validatedSummaryStatsRdd1).mapValid { summaryStatsRDD => summaryStatsRDD .flatMap { case (CombinedFeatureId(fid@GfwProFeatureId(listId, locationId), gadmId), summary) => // For non-dissolved locations or vector gadm intersection, merge all