From 8255ad22e2efa604676940dc024bcdfd9c7c78f3 Mon Sep 17 00:00:00 2001 From: manukala6 Date: Mon, 18 Sep 2023 14:03:29 -0700 Subject: [PATCH 1/3] GTC-2570 Filter out rows not requiring analysis --- .../org/globalforestwatch/summarystats/afi/AFiAnalysis.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index 742d7cfc..45ee9635 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -48,6 +48,8 @@ object AFiAnalysis extends SummaryAnalysis { val summaryDF = AFiAnalysis.aggregateResults( AFiDF .getFeatureDataFrame(summaryRDD, spark) + .filter($"location_id" =!= -2) + .filter(!$"gadm_id".contains("null")) .withColumn( "gadm_id", when(col("location_id") =!= -1|| col("gadm_id").contains("null"), lit("") ).otherwise(col("gadm_id")) ) From 4d8e7e5d2c1f7f4eeb95d64ef712d80ce7e865da Mon Sep 17 00:00:00 2001 From: manukala6 Date: Tue, 19 Sep 2023 08:22:48 -0700 Subject: [PATCH 2/3] GTC-2570 Move filter to features df --- .../org/globalforestwatch/summarystats/afi/AFiAnalysis.scala | 2 -- .../scala/org/globalforestwatch/summarystats/afi/AFiDF.scala | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index 45ee9635..742d7cfc 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -48,8 +48,6 @@ object AFiAnalysis extends SummaryAnalysis { val summaryDF = AFiAnalysis.aggregateResults( AFiDF .getFeatureDataFrame(summaryRDD, spark) - .filter($"location_id" =!= -2) - .filter(!$"gadm_id".contains("null")) .withColumn( "gadm_id", when(col("location_id") =!= -1|| col("gadm_id").contains("null"), lit("") ).otherwise(col("gadm_id")) ) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala index a1b49ac0..cd094ae9 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala @@ -40,5 +40,7 @@ object AFiDF extends SummaryDF { } .toDF("id", "error", "dataGroup", "data") .select($"id.*", $"error.*", $"dataGroup.*", $"data.*") + .filter($"location_id" =!= -2) + .filter(!$"gadm_id".contains("null")) } } From d9391b7efe5cda91dd2a540b9a6f2c829e0db0bb Mon Sep 17 00:00:00 2001 From: manukala6 Date: Wed, 27 Sep 2023 08:40:19 -0700 Subject: [PATCH 3/3] Filter locationId before analysis --- .../summarystats/afi/AFiAnalysis.scala | 1 + .../summarystats/afi/AFiCommand.scala | 12 +++++++++--- .../globalforestwatch/summarystats/afi/AFiDF.scala | 2 -- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index 742d7cfc..0b852204 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -48,6 +48,7 @@ object AFiAnalysis extends SummaryAnalysis { val summaryDF = AFiAnalysis.aggregateResults( AFiDF .getFeatureDataFrame(summaryRDD, spark) + .filter(!$"gadm_id".contains("null")) .withColumn( "gadm_id", when(col("location_id") =!= -1|| col("gadm_id").contains("null"), lit("") ).otherwise(col("gadm_id")) ) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiCommand.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiCommand.scala index a636d0f0..af46c216 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiCommand.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiCommand.scala @@ -9,13 +9,15 @@ import org.apache.sedona.core.spatialRDD.SpatialRDD import org.globalforestwatch.config.GfwConfig import org.globalforestwatch.features._ import org.locationtech.jts.geom.Geometry +import cats.data.Validated.Valid + object AFiCommand extends SummaryCommand { val afiCommand: Opts[Unit] = Opts.subcommand( name = AFiAnalysis.name, help = "Compute summary statistics for GFW Pro Dashboard." - ) { + ) ( ( defaultOptions, featureFilterOptions, @@ -30,14 +32,18 @@ object AFiCommand extends SummaryCommand { runAnalysis { implicit spark => val featureRDD = ValidatedFeatureRDD(default.featureUris, default.featureType, featureFilter, default.splitFeatures) + val filteredFeatureRDD = featureRDD.filter{ + case Valid((GfwProFeatureId(_, locationId), _)) => locationId != -2 + case _ => true + } AFiAnalysis( - featureRDD, + filteredFeatureRDD, default.featureType, spark, kwargs ) } } - } + ) } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala index cd094ae9..a1b49ac0 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala @@ -40,7 +40,5 @@ object AFiDF extends SummaryDF { } .toDF("id", "error", "dataGroup", "data") .select($"id.*", $"error.*", $"dataGroup.*", $"data.*") - .filter($"location_id" =!= -2) - .filter(!$"gadm_id".contains("null")) } }