From 3e758ea994760c0923d5d6f59cc91705817f33f8 Mon Sep 17 00:00:00 2001 From: manukala6 Date: Fri, 23 Jun 2023 10:27:38 -0700 Subject: [PATCH 01/33] afi analysis init --- .../summarystats/afi/AFiAnalysis.scala | 170 ++++++++++++++++++ .../summarystats/afi/AFiCommand.scala | 72 ++++++++ .../summarystats/afi/AFiDF.scala | 57 ++++++ .../summarystats/afi/AFiData.scala | 59 ++++++ .../summarystats/afi/AFiDataDateCount.scala | 73 ++++++++ .../summarystats/afi/AFiExport.scala | 36 ++++ .../summarystats/afi/AFiGrid.scala | 15 ++ .../summarystats/afi/AFiGridSources.scala | 46 +++++ .../summarystats/afi/AFiRDD.scala | 49 +++++ .../summarystats/afi/AFiRawData.scala | 17 ++ .../summarystats/afi/AFiRawDataGroup.scala | 19 ++ .../summarystats/afi/AFiSummary.scala | 61 +++++++ .../summarystats/afi/AFiTile.scala | 21 +++ 13 files changed, 695 insertions(+) create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiCommand.scala create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDateCount.scala create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiExport.scala create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiGrid.scala create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiRDD.scala create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala new file mode 100644 index 00000000..e5b268f8 --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -0,0 +1,170 @@ +package org.globalforestwatch.summarystats.gfwpro_dashboard + +import cats.data.{NonEmptyList, Validated} +import geotrellis.vector.{Feature, Geometry} +import geotrellis.store.index.zcurve.Z2 +import org.apache.spark.HashPartitioner +import org.globalforestwatch.features._ +import org.globalforestwatch.summarystats._ +import org.globalforestwatch.util.GeometryConstructor.createPoint +import org.globalforestwatch.util.{RDDAdapter, SpatialJoinRDD} +import org.globalforestwatch.util.RDDAdapter +import org.globalforestwatch.ValidatedWorkflow +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.SparkSession +import org.apache.spark.storage.StorageLevel +import org.globalforestwatch.features.FeatureId +import org.apache.sedona.sql.utils.Adapter +import org.apache.sedona.core.spatialRDD.SpatialRDD +import org.globalforestwatch.util.GeotrellisGeometryValidator.makeValidGeom + +import scala.collection.JavaConverters._ +import java.time.LocalDate +import org.globalforestwatch.util.IntersectGeometry + +import scala.reflect.ClassTag + +object GfwProDashboardAnalysis extends SummaryAnalysis { + + val name = "gfwpro_dashboard" + + def apply( + featureRDD: RDD[ValidatedLocation[Geometry]], + featureType: String, + contextualFeatureType: String, + contextualFeatureUrl: NonEmptyList[String], + fireAlertRDD: SpatialRDD[Geometry], + spark: SparkSession, + kwargs: Map[String, Any] + ): Unit = { + featureRDD.persist(StorageLevel.MEMORY_AND_DISK) + + val summaryRDD = ValidatedWorkflow(featureRDD).flatMap { rdd => + val spatialContextualDF = SpatialFeatureDF(contextualFeatureUrl, contextualFeatureType, FeatureFilter.empty, "geom", spark) + val spatialContextualRDD = Adapter.toSpatialRdd(spatialContextualDF, "polyshape") + val spatialFeatureRDD = RDDAdapter.toSpatialRDDfromLocationRdd(rdd, spark) + + /* Enrich the feature RDD by intersecting it with contextual features + * The resulting FeatuerId carries combined identity of source fature and contextual geometry + */ + val enrichedRDD = + SpatialJoinRDD + .flatSpatialJoin(spatialContextualRDD, spatialFeatureRDD, considerBoundaryIntersection = true, usingIndex = true) + .rdd + .flatMap { case (feature, context) => + refineContextualIntersection(feature, context, contextualFeatureType) + } + + ValidatedWorkflow(enrichedRDD) + .mapValidToValidated { rdd => + rdd.map { case row@Location(fid, geom) => + if (geom.isEmpty()) { + Validated.invalid[Location[JobError], Location[Geometry]](Location(fid, GeometryError(s"Empty Geometry"))) + } else if (!geom.isValid) { + Validated.invalid[Location[JobError], Location[Geometry]](Location(fid, GeometryError(s"Invalid Geometry"))) + } else + Validated.valid[Location[JobError], Location[Geometry]](row) + } + } + .flatMap { enrichedRDD => + val fireStatsRDD = fireStats(enrichedRDD, fireAlertRDD, spark) + val tmp = enrichedRDD.map { case Location(id, geom) => Feature(geom, id) } + val validatedSummaryStatsRdd = GfwProDashboardRDD(tmp, GfwProDashboardGrid.blockTileGrid, kwargs) + ValidatedWorkflow(validatedSummaryStatsRdd).mapValid { summaryStatsRDD => + // fold in fireStatsRDD after polygonal summary and accumulate the errors + summaryStatsRDD + .mapValues(_.toGfwProDashboardData()) + .leftOuterJoin(fireStatsRDD) + .mapValues { case (data, fire) => + data.copy(viirs_alerts_daily = fire.getOrElse(GfwProDashboardDataDateCount.empty)) + } + } + } + } + + val summaryDF = GfwProDashboardDF.getFeatureDataFrameFromVerifiedRdd(summaryRDD.unify, spark) + val runOutputUrl: String = getOutputUrl(kwargs) + GfwProDashboardExport.export(featureType, summaryDF, runOutputUrl, kwargs) + + } + + /** These geometries touch, apply application specific logic of how to treat that. + * - For intersection of location geometries only keep those where centroid of location is in the contextual geom (this ensures that + * any location is only assigned to a single contextual area even if it intersects more) + * - For dissolved geometry of list report all contextual areas it intersects + */ + private def refineContextualIntersection( + featureGeom: Geometry, + contextualGeom: Geometry, + contextualFeatureType: String + ): List[ValidatedLocation[Geometry]] = { + val featureId = featureGeom.getUserData.asInstanceOf[FeatureId] + val contextualId = FeatureId.fromUserData(contextualFeatureType, contextualGeom.getUserData.asInstanceOf[String], delimiter = ",") + + featureId match { + case gfwproId: GfwProFeatureId if gfwproId.locationId >= 0 => + val featureCentroid = createPoint(featureGeom.getCentroid.getX, featureGeom.getCentroid.getY) + if (contextualGeom.contains(featureCentroid)) { + val fid = CombinedFeatureId(gfwproId, contextualId) + // val gtGeom: Geometry = toGeotrellisGeometry(featureGeom) + val fixedGeom = makeValidGeom(featureGeom) + List(Validated.Valid(Location(fid, fixedGeom))) + } else Nil + + case gfwproId: GfwProFeatureId if gfwproId.locationId < 0 => + IntersectGeometry + .validatedIntersection(featureGeom, contextualGeom) + .leftMap { err => Location(featureId, err) } + .map { geometries => + geometries.map { geom => + // val gtGeom: Geometry = toGeotrellisGeometry(geom) + val fixedGeom = makeValidGeom(geom) + Location(CombinedFeatureId(gfwproId, contextualId), fixedGeom) + } + } + .traverse(identity) // turn validated list of geometries into list of validated geometries + } + } + + private def partitionByZIndex[A: ClassTag](rdd: RDD[A])(getGeom: A => Geometry): RDD[A] = { + val hashPartitioner = new HashPartitioner(rdd.getNumPartitions) + + rdd + .keyBy({ row => + val geom = getGeom(row) + Z2( + (geom.getCentroid.getX * 100).toInt, + (geom.getCentroid.getY * 100).toInt + ).z + }) + .partitionBy(hashPartitioner) + .mapPartitions( + { iter: Iterator[(Long, A)] => + for (i <- iter) yield i._2 + }, + preservesPartitioning = true + ) + } + + private def fireStats( + featureRDD: RDD[Location[Geometry]], + fireAlertRDD: SpatialRDD[Geometry], + spark: SparkSession + ): RDD[Location[GfwProDashboardDataDateCount]] = { + val featureSpatialRDD = RDDAdapter.toSpatialRDDfromLocationRdd(featureRDD, spark) + val joinedRDD = SpatialJoinRDD.spatialjoin(featureSpatialRDD, fireAlertRDD) + + joinedRDD.rdd + .map { case (poly, points) => + val fid = poly.getUserData.asInstanceOf[FeatureId] + val data = points.asScala.foldLeft(GfwProDashboardDataDateCount.empty) { (z, point) => + // extract year from acq_date column is YYYY-MM-DD + val acqDate = point.getUserData.asInstanceOf[String].split("\t")(2) + val alertDate = LocalDate.parse(acqDate) + z.merge(GfwProDashboardDataDateCount.fillDaily(Some(alertDate), 1)) + } + (fid, data) + } + .reduceByKey(_ merge _) + } +} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiCommand.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiCommand.scala new file mode 100644 index 00000000..ee21f30f --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiCommand.scala @@ -0,0 +1,72 @@ +package org.globalforestwatch.summarystats.gfwpro_dashboard + +import cats.data.NonEmptyList +import org.globalforestwatch.summarystats.SummaryCommand +import cats.implicits._ +import com.monovore.decline.Opts +import geotrellis.vector.Geometry +import org.apache.sedona.core.spatialRDD.SpatialRDD +import org.globalforestwatch.config.GfwConfig +import org.globalforestwatch.features._ +import org.locationtech.jts.geom.Geometry + +object GfwProDashboardCommand extends SummaryCommand { + + val contextualFeatureUrlOpt: Opts[NonEmptyList[String]] = Opts + .options[String]( + "contextual_feature_url", + help = "URI of contextual features in TSV format" + ) + + val contextualFeatureTypeOpt: Opts[String] = Opts + .option[String]( + "contextual_feature_type", + help = "Type of contextual features" + ) + + val gfwProDashboardCommand: Opts[Unit] = Opts.subcommand( + name = GfwProDashboardAnalysis.name, + help = "Compute summary statistics for GFW Pro Dashboard." + ) { + ( + defaultOptions, + optionalFireAlertOptions, + featureFilterOptions, + contextualFeatureUrlOpt, + contextualFeatureTypeOpt + ).mapN { (default, fireAlert, filterOptions, contextualFeatureUrl, contextualFeatureType) => + val kwargs = Map( + "outputUrl" -> default.outputUrl, + "noOutputPathSuffix" -> default.noOutputPathSuffix, + "overwriteOutput" -> default.overwriteOutput, + "config" -> GfwConfig.get + ) + // TODO: move building the feature object into options + val featureFilter = FeatureFilter.fromOptions(default.featureType, filterOptions) + + runAnalysis { implicit spark => + val featureRDD = ValidatedFeatureRDD(default.featureUris, default.featureType, featureFilter, default.splitFeatures) + + val fireAlertRDD = fireAlert.alertSource match { + case Some(alertSource) => + FireAlertRDD(spark, fireAlert.alertType, alertSource, FeatureFilter.empty) + case None => + // If no sources provided, just create an empty RDD + val spatialRDD = new SpatialRDD[Geometry] + spatialRDD.rawSpatialRDD = spark.sparkContext.emptyRDD[Geometry].toJavaRDD() + spatialRDD + } + + GfwProDashboardAnalysis( + featureRDD, + default.featureType, + contextualFeatureType = contextualFeatureType, + contextualFeatureUrl = contextualFeatureUrl, + fireAlertRDD, + spark, + kwargs + ) + } + } + } +} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala new file mode 100644 index 00000000..3520fafa --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala @@ -0,0 +1,57 @@ +package org.globalforestwatch.summarystats.gfwpro_dashboard + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{DataFrame, SparkSession} +import org.globalforestwatch.features.{CombinedFeatureId, FeatureId, GadmFeatureId, GfwProFeatureId} +import org.globalforestwatch.summarystats._ +import cats.data.Validated.{Valid, Invalid} + +object GfwProDashboardDF extends SummaryDF { + case class RowGadmId(list_id: String, location_id: String, gadm_id: String) + + def getFeatureDataFrameFromVerifiedRdd( + dataRDD: RDD[ValidatedLocation[GfwProDashboardData]], + spark: SparkSession + ): DataFrame = { + import spark.implicits._ + + val rowId: FeatureId => RowGadmId = { + case CombinedFeatureId(proId: GfwProFeatureId, gadmId: GadmFeatureId) => + RowGadmId(proId.listId, proId.locationId.toString, gadmId.toString()) + case proId: GfwProFeatureId => + RowGadmId(proId.listId, proId.locationId.toString, "none") + case _ => + throw new IllegalArgumentException("Not a CombinedFeatureId[GfwProFeatureId, GadmFeatureId]") + } + dataRDD.map { + case Valid(Location(id, data)) => + (rowId(id), SummaryDF.RowError.empty, data) + case Invalid(Location(id, err)) => + (rowId(id), SummaryDF.RowError.fromJobError(err), GfwProDashboardData.empty) + } + .toDF("id", "error", "data") + .select($"id.*", $"error.*", $"data.*") + } + + def getFeatureDataFrame( + dataRDD: RDD[(FeatureId, ValidatedRow[GfwProDashboardData])], + spark: SparkSession + ): DataFrame = { + import spark.implicits._ + + dataRDD.mapValues { + case Valid(data) => + (SummaryDF.RowError.empty, data) + case Invalid(err) => + (SummaryDF.RowError.fromJobError(err), GfwProDashboardData.empty) + }.map { + case (CombinedFeatureId(proId: GfwProFeatureId, gadmId: GadmFeatureId), (error, data)) => + val rowId = RowGadmId(proId.listId, proId.locationId.toString, gadmId.toString()) + (rowId, error, data) + case _ => + throw new IllegalArgumentException("Not a CombinedFeatureId[GfwProFeatureId, GadmFeatureId]") + } + .toDF("id", "error", "data") + .select($"id.*", $"error.*", $"data.*") + } +} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala new file mode 100644 index 00000000..e6f55d2f --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala @@ -0,0 +1,59 @@ +package org.globalforestwatch.summarystats.gfwpro_dashboard + +import cats.Semigroup +import org.globalforestwatch.summarystats.forest_change_diagnostic.ForestChangeDiagnosticDataDouble +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder + +/** Summary data per class + * + * Note: This case class contains mutable values + */ +case class GfwProDashboardData( + /** Location intersects GLAD Alert tiles, GLAD alerts are possible */ + glad_alerts_coverage: Boolean, + /** How many hacters of location geometry had tree cover extent > 30% in 2000 */ + tree_cover_extent_total: ForestChangeDiagnosticDataDouble, + /** GLAD alert count within location geometry grouped by day */ + glad_alerts_daily: GfwProDashboardDataDateCount, + /** GLAD alert count within location geometry grouped by ISO year-week */ + glad_alerts_weekly: GfwProDashboardDataDateCount, + /** GLAD alert count within location geometry grouped by year-month */ + glad_alerts_monthly: GfwProDashboardDataDateCount, + /** VIIRS alerts for location geometry grouped by day */ + viirs_alerts_daily: GfwProDashboardDataDateCount, +) { + + def merge(other: GfwProDashboardData): GfwProDashboardData = { + GfwProDashboardData( + glad_alerts_coverage || other.glad_alerts_coverage, + tree_cover_extent_total.merge(other.tree_cover_extent_total), + glad_alerts_daily.merge(other.glad_alerts_daily), + glad_alerts_weekly.merge(other.glad_alerts_weekly), + glad_alerts_monthly.merge(other.glad_alerts_monthly), + viirs_alerts_daily.merge(other.viirs_alerts_daily) + ) + } +} + +object GfwProDashboardData { + + def empty: GfwProDashboardData = + GfwProDashboardData( + glad_alerts_coverage = false, + tree_cover_extent_total = ForestChangeDiagnosticDataDouble.empty, + GfwProDashboardDataDateCount.empty, + GfwProDashboardDataDateCount.empty, + GfwProDashboardDataDateCount.empty, + GfwProDashboardDataDateCount.empty + ) + + implicit val gfwProDashboardDataSemigroup: Semigroup[GfwProDashboardData] = + new Semigroup[GfwProDashboardData] { + def combine(x: GfwProDashboardData, y: GfwProDashboardData): GfwProDashboardData = + x.merge(y) + } + + implicit def dataExpressionEncoder: ExpressionEncoder[GfwProDashboardData] = + frameless.TypedExpressionEncoder[GfwProDashboardData] + .asInstanceOf[ExpressionEncoder[GfwProDashboardData]] +} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDateCount.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDateCount.scala new file mode 100644 index 00000000..d5478cb6 --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDateCount.scala @@ -0,0 +1,73 @@ +package org.globalforestwatch.summarystats.gfwpro_dashboard + +import io.circe.syntax._ + +import scala.collection.immutable.SortedMap +import frameless.Injection +import cats.implicits._ +import java.time.LocalDate +import java.time.format._ +import java.time.temporal._ + +case class GfwProDashboardDataDateCount(value: SortedMap[String, Int]) { + + def merge(other: GfwProDashboardDataDateCount): GfwProDashboardDataDateCount = { + GfwProDashboardDataDateCount(value combine other.value) + } + + def toJson: String = this.value.asJson.noSpaces +} + +object GfwProDashboardDataDateCount { + + /** ex: 2016-1-1 => 2015-53 because the 1st of 2016 is Friday of the last week of 2015 */ + val WeekOfYear = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .appendValue(IsoFields.WEEK_BASED_YEAR, 4, 10, SignStyle.EXCEEDS_PAD) + .appendLiteral("-") + .appendValue(IsoFields.WEEK_OF_WEEK_BASED_YEAR, 2) + .toFormatter(java.util.Locale.US); + + val MonthOfYear = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .appendValue(ChronoField.YEAR, 4, 10, SignStyle.EXCEEDS_PAD) + .appendLiteral("-") + .appendValue(ChronoField.MONTH_OF_YEAR, 2) + .toFormatter(java.util.Locale.US); + + implicit def injection: Injection[GfwProDashboardDataDateCount, String] = Injection(_.toJson, fromString) + + def empty: GfwProDashboardDataDateCount = GfwProDashboardDataDateCount(SortedMap()) + + def fillDaily(alertDate: Option[LocalDate], alertCount: Int): GfwProDashboardDataDateCount = + fill(alertDate, alertCount, _.format(DateTimeFormatter.ISO_DATE)) + + def fillWeekly(alertDate: Option[LocalDate], alertCount: Int): GfwProDashboardDataDateCount = + fill(alertDate, alertCount, _.format(WeekOfYear)) + + def fillMonthly(alertDate: Option[LocalDate], alertCount: Int): GfwProDashboardDataDateCount = + fill(alertDate, alertCount, _.format(MonthOfYear)) + + def fill( + alertDate: Option[LocalDate], + alertCount: Int, + formatter: LocalDate => String + ): GfwProDashboardDataDateCount = { + + alertDate match { + case Some(date) => + val dateKey: String = formatter(date) + GfwProDashboardDataDateCount(SortedMap(dateKey -> alertCount)) + + case _ => + this.empty + } + } + + def fromString(value: String): GfwProDashboardDataDateCount = { + val sortedMap = io.circe.parser.decode[SortedMap[String, Int]](value) + GfwProDashboardDataDateCount(sortedMap.getOrElse(SortedMap())) + } +} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiExport.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiExport.scala new file mode 100644 index 00000000..178e5d35 --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiExport.scala @@ -0,0 +1,36 @@ +package org.globalforestwatch.summarystats.gfwpro_dashboard + +import org.apache.spark.sql.{DataFrame, SaveMode} +import org.globalforestwatch.summarystats.SummaryExport +import org.globalforestwatch.util.Util.getAnyMapValue + +object GfwProDashboardExport extends SummaryExport { + + override val csvOptions: Map[String, String] = Map( + "header" -> "true", + "delimiter" -> "\t", + "quote" -> "\u0000", + "escape" -> "\u0000", + "quoteMode" -> "NONE", + "nullValue" -> null, + "emptyValue" -> null + ) + + override protected def exportGfwPro(summaryDF: DataFrame, + outputUrl: String, + kwargs: Map[String, Any]): Unit = { + val saveMode = + if (getAnyMapValue[Boolean](kwargs, "overwriteOutput")) + SaveMode.Overwrite + else + SaveMode.ErrorIfExists + + summaryDF + .repartition(1) + .write + .mode(saveMode) + .options(csvOptions) + .csv(path = outputUrl + "/final") + } + +} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGrid.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGrid.scala new file mode 100644 index 00000000..e7db3c50 --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGrid.scala @@ -0,0 +1,15 @@ +package org.globalforestwatch.summarystats.gfwpro_dashboard + +import geotrellis.vector.Extent +import org.globalforestwatch.grids.{GridTile, TenByTen30mGrid} + +object GfwProDashboardGrid + extends TenByTen30mGrid[GfwProDashboardGridSources] { + + val gridExtent: Extent = Extent(-180.0000, -90.0000, 180.0000, 90.0000) + + def getSources(gridTile: GridTile, + kwargs: Map[String, Any]): GfwProDashboardGridSources = + GfwProDashboardGridSources.getCachedSources(gridTile, kwargs) + +} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala new file mode 100644 index 00000000..97528329 --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala @@ -0,0 +1,46 @@ +package org.globalforestwatch.summarystats.gfwpro_dashboard + +import cats.implicits._ +import geotrellis.layer.{LayoutDefinition, SpatialKey} +import geotrellis.raster.Raster +import org.globalforestwatch.grids.{GridSources, GridTile} +import org.globalforestwatch.layers._ + +/** + * @param gridTile top left corner, padded from east ex: "10N_010E" + */ +case class GfwProDashboardGridSources(gridTile: GridTile, kwargs: Map[String, Any]) extends GridSources { + val gladAlerts = GladAlerts(gridTile, kwargs) + val treeCoverDensity2000 = TreeCoverDensityPercent2000(gridTile, kwargs) + + def readWindow( + windowKey: SpatialKey, + windowLayout: LayoutDefinition + ): Either[Throwable, Raster[GfwProDashboardTile]] = { + + for { + // Glad alerts are Optional Tiles, but we keep it this way to avoid signature changes + gladAlertsTile <- Either + .catchNonFatal(gladAlerts.fetchWindow(windowKey, windowLayout)) + .right + tcd2000Tile <- Either + .catchNonFatal(treeCoverDensity2000.fetchWindow(windowKey, windowLayout)) + .right + } yield { + val tile = GfwProDashboardTile(gladAlertsTile, tcd2000Tile) + Raster(tile, windowKey.extent(windowLayout)) + } + } +} + +object GfwProDashboardGridSources { + + @transient + private lazy val cache = + scala.collection.concurrent.TrieMap + .empty[String, GfwProDashboardGridSources] + + def getCachedSources(gridTile: GridTile, kwargs: Map[String, Any]): GfwProDashboardGridSources = { + cache.getOrElseUpdate(gridTile.tileId, GfwProDashboardGridSources(gridTile, kwargs)) + } +} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRDD.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRDD.scala new file mode 100644 index 00000000..68d10ffc --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRDD.scala @@ -0,0 +1,49 @@ +package org.globalforestwatch.summarystats.gfwpro_dashboard + +import cats.implicits._ +import geotrellis.layer.{LayoutDefinition, SpatialKey} +import geotrellis.raster._ +import geotrellis.raster.rasterize.Rasterizer +import geotrellis.raster.summary.polygonal._ +import geotrellis.vector._ +import org.globalforestwatch.summarystats.ErrorSummaryRDD + +object GfwProDashboardRDD extends ErrorSummaryRDD { + + type SOURCES = GfwProDashboardGridSources + type SUMMARY = GfwProDashboardSummary + type TILE = GfwProDashboardTile + + def getSources(windowKey: SpatialKey, + windowLayout: LayoutDefinition, + kwargs: Map[String, Any]): Either[Throwable, SOURCES] = { + Either.catchNonFatal { + GfwProDashboardGrid.getRasterSource( + windowKey, + windowLayout, + kwargs + ) + } + } + + def readWindow( + rs: SOURCES, + windowKey: SpatialKey, + windowLayout: LayoutDefinition + ): Either[Throwable, Raster[TILE]] = + rs.readWindow(windowKey, windowLayout) + + def runPolygonalSummary( + raster: Raster[TILE], + geometry: Geometry, + options: Rasterizer.Options, + kwargs: Map[String, Any] + ): PolygonalSummaryResult[SUMMARY] = { + raster.polygonalSummary( + geometry, + GfwProDashboardSummary.getGridVisitor(kwargs), + options = options + ) + } + +} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala new file mode 100644 index 00000000..f969d92f --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala @@ -0,0 +1,17 @@ +package org.globalforestwatch.summarystats.gfwpro_dashboard + +import cats.Semigroup + +/** Summary data per class + * + * Note: This case class contains mutable values + */ +case class GfwProDashboardRawData(var treeCoverExtentArea: Double, var alertCount: Int) { + def merge(other: GfwProDashboardRawData): GfwProDashboardRawData = { + GfwProDashboardRawData(treeCoverExtentArea + other.treeCoverExtentArea, alertCount + other.alertCount) + } +} + +object GfwProDashboardRawData { + implicit val lossDataSemigroup: Semigroup[GfwProDashboardRawData] = Semigroup.instance(_ merge _) +} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala new file mode 100644 index 00000000..3274db4a --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala @@ -0,0 +1,19 @@ +package org.globalforestwatch.summarystats.gfwpro_dashboard + +import org.globalforestwatch.summarystats.forest_change_diagnostic.ForestChangeDiagnosticDataDouble +import java.time.LocalDate + +case class GfwProDashboardRawDataGroup( + alertDate: Option[LocalDate], + gladAlertsCoverage: Boolean +) { + def toGfwProDashboardData(alertCount: Int, totalArea: Double): GfwProDashboardData = { + GfwProDashboardData( + glad_alerts_coverage = gladAlertsCoverage, + glad_alerts_daily = GfwProDashboardDataDateCount.fillDaily(alertDate, alertCount), + glad_alerts_weekly = GfwProDashboardDataDateCount.fillWeekly(alertDate, alertCount), + glad_alerts_monthly = GfwProDashboardDataDateCount.fillMonthly(alertDate, alertCount), + viirs_alerts_daily = GfwProDashboardDataDateCount.empty, + tree_cover_extent_total = ForestChangeDiagnosticDataDouble.fill(totalArea)) + } +} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala new file mode 100644 index 00000000..a41e9cbc --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala @@ -0,0 +1,61 @@ +package org.globalforestwatch.summarystats.gfwpro_dashboard + +import cats.implicits._ +import geotrellis.raster._ +import geotrellis.raster.Raster +import geotrellis.raster.summary.GridVisitor +import org.globalforestwatch.summarystats.Summary +import org.globalforestwatch.util.Geodesy +import java.time.LocalDate + +/** LossData Summary by year */ +case class GfwProDashboardSummary( + stats: Map[GfwProDashboardRawDataGroup, GfwProDashboardRawData] = Map.empty + ) extends Summary[GfwProDashboardSummary] { + + /** Combine two Maps and combine their LossData when a year is present in both */ + def merge(other: GfwProDashboardSummary): GfwProDashboardSummary = { + // the years.combine method uses LossData.lossDataSemigroup instance to perform per value combine on the map + GfwProDashboardSummary(stats.combine(other.stats)) + } + def isEmpty = stats.isEmpty + + def toGfwProDashboardData(): GfwProDashboardData = { + stats + .map { case (group, data) => group.toGfwProDashboardData(data.alertCount, data.treeCoverExtentArea) } + .foldLeft(GfwProDashboardData.empty)( _ merge _) + } +} + +object GfwProDashboardSummary { + + def getGridVisitor(kwargs: Map[String, Any]): GridVisitor[Raster[GfwProDashboardTile], GfwProDashboardSummary] = + new GridVisitor[Raster[GfwProDashboardTile], GfwProDashboardSummary] { + private var acc: GfwProDashboardSummary = + new GfwProDashboardSummary() + + def result: GfwProDashboardSummary = acc + + def visit(raster: Raster[GfwProDashboardTile], col: Int, row: Int): Unit = { + val tcd2000: Integer = raster.tile.tcd2000.getData(col, row) + val gladAlertDate: Option[LocalDate] = raster.tile.gladAlerts.getData(col, row).map { case (date, _) => date } + val gladAlertCoverage = raster.tile.gladAlerts.t.isDefined + val isTreeCoverExtent30: Boolean = tcd2000 > 30 + + val groupKey = GfwProDashboardRawDataGroup(gladAlertDate, gladAlertsCoverage = gladAlertCoverage) + val summaryData = acc.stats.getOrElse(groupKey, GfwProDashboardRawData(treeCoverExtentArea = 0.0, alertCount = 0)) + + if (isTreeCoverExtent30) { + val areaHa = Geodesy.pixelArea(lat = raster.rasterExtent.gridRowToMap(row), raster.cellSize) / 10000.0 + summaryData.treeCoverExtentArea += areaHa + } + + if (gladAlertDate.isDefined) { + summaryData.alertCount += 1 + } + + val new_stats = acc.stats.updated(groupKey, summaryData) + acc = GfwProDashboardSummary(new_stats) + } + } +} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala new file mode 100644 index 00000000..045b86c2 --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala @@ -0,0 +1,21 @@ +package org.globalforestwatch.summarystats.gfwpro_dashboard + +import geotrellis.raster.{CellGrid, CellType, IntCellType} +import org.globalforestwatch.layers._ + +/** + * + * Tile-like structure to hold tiles from datasets required for our summary. + * We can not use GeoTrellis MultibandTile because it requires all bands share a CellType. + */ +case class GfwProDashboardTile( + gladAlerts: GladAlerts#OptionalITile, + tcd2000: TreeCoverDensityPercent2000#ITile +) extends CellGrid[Int] { + + def cellType: CellType = gladAlerts.cellType.getOrElse(IntCellType) + + def cols: Int = gladAlerts.cols.getOrElse(GfwProDashboardGrid.blockSize) + + def rows: Int = gladAlerts.rows.getOrElse(GfwProDashboardGrid.blockSize) +} From a92c718aa4f396a5aa74e6a479890015a25fbd40 Mon Sep 17 00:00:00 2001 From: Justin Terry Date: Fri, 23 Jun 2023 11:01:18 -0700 Subject: [PATCH 02/33] AFi initial code --- .../summarystats/afi/AFiAnalysis.scala | 22 ++++---- .../summarystats/afi/AFiCommand.scala | 41 ++------------ .../summarystats/afi/AFiDF.scala | 12 ++-- .../summarystats/afi/AFiData.scala | 42 +++++++------- .../summarystats/afi/AFiDataDateCount.scala | 28 +++++----- .../summarystats/afi/AFiExport.scala | 4 +- .../summarystats/afi/AFiGrid.scala | 10 ++-- .../summarystats/afi/AFiGridSources.scala | 27 ++++----- .../summarystats/afi/AFiRDD.scala | 14 ++--- .../summarystats/afi/AFiRawData.scala | 12 ++-- .../summarystats/afi/AFiRawDataGroup.scala | 19 +++---- .../summarystats/afi/AFiSummary.scala | 55 +++++++++---------- .../summarystats/afi/AFiTile.scala | 13 ++--- 13 files changed, 128 insertions(+), 171 deletions(-) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index e5b268f8..daf0e5ae 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -1,4 +1,4 @@ -package org.globalforestwatch.summarystats.gfwpro_dashboard +package org.globalforestwatch.summarystats.afi import cats.data.{NonEmptyList, Validated} import geotrellis.vector.{Feature, Geometry} @@ -24,9 +24,9 @@ import org.globalforestwatch.util.IntersectGeometry import scala.reflect.ClassTag -object GfwProDashboardAnalysis extends SummaryAnalysis { +object AFiAnalysis extends SummaryAnalysis { - val name = "gfwpro_dashboard" + val name = "aafi" def apply( featureRDD: RDD[ValidatedLocation[Geometry]], @@ -69,22 +69,22 @@ object GfwProDashboardAnalysis extends SummaryAnalysis { .flatMap { enrichedRDD => val fireStatsRDD = fireStats(enrichedRDD, fireAlertRDD, spark) val tmp = enrichedRDD.map { case Location(id, geom) => Feature(geom, id) } - val validatedSummaryStatsRdd = GfwProDashboardRDD(tmp, GfwProDashboardGrid.blockTileGrid, kwargs) + val validatedSummaryStatsRdd = AFiRDD(tmp, AFiGrid.blockTileGrid, kwargs) ValidatedWorkflow(validatedSummaryStatsRdd).mapValid { summaryStatsRDD => // fold in fireStatsRDD after polygonal summary and accumulate the errors summaryStatsRDD - .mapValues(_.toGfwProDashboardData()) + .mapValues(_.toAFiData()) .leftOuterJoin(fireStatsRDD) .mapValues { case (data, fire) => - data.copy(viirs_alerts_daily = fire.getOrElse(GfwProDashboardDataDateCount.empty)) + data.copy(viirs_alerts_daily = fire.getOrElse(AFiDataDateCount.empty)) } } } } - val summaryDF = GfwProDashboardDF.getFeatureDataFrameFromVerifiedRdd(summaryRDD.unify, spark) + val summaryDF = AFiDF.getFeatureDataFrameFromVerifiedRdd(summaryRDD.unify, spark) val runOutputUrl: String = getOutputUrl(kwargs) - GfwProDashboardExport.export(featureType, summaryDF, runOutputUrl, kwargs) + AFiExport.export(featureType, summaryDF, runOutputUrl, kwargs) } @@ -150,18 +150,18 @@ object GfwProDashboardAnalysis extends SummaryAnalysis { featureRDD: RDD[Location[Geometry]], fireAlertRDD: SpatialRDD[Geometry], spark: SparkSession - ): RDD[Location[GfwProDashboardDataDateCount]] = { + ): RDD[Location[AFiDataDateCount]] = { val featureSpatialRDD = RDDAdapter.toSpatialRDDfromLocationRdd(featureRDD, spark) val joinedRDD = SpatialJoinRDD.spatialjoin(featureSpatialRDD, fireAlertRDD) joinedRDD.rdd .map { case (poly, points) => val fid = poly.getUserData.asInstanceOf[FeatureId] - val data = points.asScala.foldLeft(GfwProDashboardDataDateCount.empty) { (z, point) => + val data = points.asScala.foldLeft(AFiDataDateCount.empty) { (z, point) => // extract year from acq_date column is YYYY-MM-DD val acqDate = point.getUserData.asInstanceOf[String].split("\t")(2) val alertDate = LocalDate.parse(acqDate) - z.merge(GfwProDashboardDataDateCount.fillDaily(Some(alertDate), 1)) + z.merge(AFiDataDateCount.fillDaily(Some(alertDate), 1)) } (fid, data) } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiCommand.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiCommand.scala index ee21f30f..a636d0f0 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiCommand.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiCommand.scala @@ -1,4 +1,4 @@ -package org.globalforestwatch.summarystats.gfwpro_dashboard +package org.globalforestwatch.summarystats.afi import cats.data.NonEmptyList import org.globalforestwatch.summarystats.SummaryCommand @@ -10,59 +10,30 @@ import org.globalforestwatch.config.GfwConfig import org.globalforestwatch.features._ import org.locationtech.jts.geom.Geometry -object GfwProDashboardCommand extends SummaryCommand { +object AFiCommand extends SummaryCommand { - val contextualFeatureUrlOpt: Opts[NonEmptyList[String]] = Opts - .options[String]( - "contextual_feature_url", - help = "URI of contextual features in TSV format" - ) - - val contextualFeatureTypeOpt: Opts[String] = Opts - .option[String]( - "contextual_feature_type", - help = "Type of contextual features" - ) - - val gfwProDashboardCommand: Opts[Unit] = Opts.subcommand( - name = GfwProDashboardAnalysis.name, + val afiCommand: Opts[Unit] = Opts.subcommand( + name = AFiAnalysis.name, help = "Compute summary statistics for GFW Pro Dashboard." ) { ( defaultOptions, - optionalFireAlertOptions, featureFilterOptions, - contextualFeatureUrlOpt, - contextualFeatureTypeOpt - ).mapN { (default, fireAlert, filterOptions, contextualFeatureUrl, contextualFeatureType) => + ).mapN { (default, filterOptions) => val kwargs = Map( "outputUrl" -> default.outputUrl, "noOutputPathSuffix" -> default.noOutputPathSuffix, "overwriteOutput" -> default.overwriteOutput, "config" -> GfwConfig.get ) - // TODO: move building the feature object into options val featureFilter = FeatureFilter.fromOptions(default.featureType, filterOptions) runAnalysis { implicit spark => val featureRDD = ValidatedFeatureRDD(default.featureUris, default.featureType, featureFilter, default.splitFeatures) - val fireAlertRDD = fireAlert.alertSource match { - case Some(alertSource) => - FireAlertRDD(spark, fireAlert.alertType, alertSource, FeatureFilter.empty) - case None => - // If no sources provided, just create an empty RDD - val spatialRDD = new SpatialRDD[Geometry] - spatialRDD.rawSpatialRDD = spark.sparkContext.emptyRDD[Geometry].toJavaRDD() - spatialRDD - } - - GfwProDashboardAnalysis( + AFiAnalysis( featureRDD, default.featureType, - contextualFeatureType = contextualFeatureType, - contextualFeatureUrl = contextualFeatureUrl, - fireAlertRDD, spark, kwargs ) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala index 3520fafa..79076f64 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala @@ -1,4 +1,4 @@ -package org.globalforestwatch.summarystats.gfwpro_dashboard +package org.globalforestwatch.summarystats.afi import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, SparkSession} @@ -6,11 +6,11 @@ import org.globalforestwatch.features.{CombinedFeatureId, FeatureId, GadmFeature import org.globalforestwatch.summarystats._ import cats.data.Validated.{Valid, Invalid} -object GfwProDashboardDF extends SummaryDF { +object AFiDF extends SummaryDF { case class RowGadmId(list_id: String, location_id: String, gadm_id: String) def getFeatureDataFrameFromVerifiedRdd( - dataRDD: RDD[ValidatedLocation[GfwProDashboardData]], + dataRDD: RDD[ValidatedLocation[AFiData]], spark: SparkSession ): DataFrame = { import spark.implicits._ @@ -27,14 +27,14 @@ object GfwProDashboardDF extends SummaryDF { case Valid(Location(id, data)) => (rowId(id), SummaryDF.RowError.empty, data) case Invalid(Location(id, err)) => - (rowId(id), SummaryDF.RowError.fromJobError(err), GfwProDashboardData.empty) + (rowId(id), SummaryDF.RowError.fromJobError(err), AFiData.empty) } .toDF("id", "error", "data") .select($"id.*", $"error.*", $"data.*") } def getFeatureDataFrame( - dataRDD: RDD[(FeatureId, ValidatedRow[GfwProDashboardData])], + dataRDD: RDD[(FeatureId, ValidatedRow[AFiData])], spark: SparkSession ): DataFrame = { import spark.implicits._ @@ -43,7 +43,7 @@ object GfwProDashboardDF extends SummaryDF { case Valid(data) => (SummaryDF.RowError.empty, data) case Invalid(err) => - (SummaryDF.RowError.fromJobError(err), GfwProDashboardData.empty) + (SummaryDF.RowError.fromJobError(err), AFiData.empty) }.map { case (CombinedFeatureId(proId: GfwProFeatureId, gadmId: GadmFeatureId), (error, data)) => val rowId = RowGadmId(proId.listId, proId.locationId.toString, gadmId.toString()) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala index e6f55d2f..8fde5d38 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala @@ -1,4 +1,4 @@ -package org.globalforestwatch.summarystats.gfwpro_dashboard +package org.globalforestwatch.summarystats.afi import cats.Semigroup import org.globalforestwatch.summarystats.forest_change_diagnostic.ForestChangeDiagnosticDataDouble @@ -8,23 +8,23 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder * * Note: This case class contains mutable values */ -case class GfwProDashboardData( +case class AFiData( /** Location intersects GLAD Alert tiles, GLAD alerts are possible */ glad_alerts_coverage: Boolean, /** How many hacters of location geometry had tree cover extent > 30% in 2000 */ tree_cover_extent_total: ForestChangeDiagnosticDataDouble, /** GLAD alert count within location geometry grouped by day */ - glad_alerts_daily: GfwProDashboardDataDateCount, + glad_alerts_daily: AFiDataDateCount, /** GLAD alert count within location geometry grouped by ISO year-week */ - glad_alerts_weekly: GfwProDashboardDataDateCount, + glad_alerts_weekly: AFiDataDateCount, /** GLAD alert count within location geometry grouped by year-month */ - glad_alerts_monthly: GfwProDashboardDataDateCount, + glad_alerts_monthly: AFiDataDateCount, /** VIIRS alerts for location geometry grouped by day */ - viirs_alerts_daily: GfwProDashboardDataDateCount, + viirs_alerts_daily: AFiDataDateCount, ) { - def merge(other: GfwProDashboardData): GfwProDashboardData = { - GfwProDashboardData( + def merge(other: AFiData): AFiData = { + AFiData( glad_alerts_coverage || other.glad_alerts_coverage, tree_cover_extent_total.merge(other.tree_cover_extent_total), glad_alerts_daily.merge(other.glad_alerts_daily), @@ -35,25 +35,25 @@ case class GfwProDashboardData( } } -object GfwProDashboardData { +object AFiData { - def empty: GfwProDashboardData = - GfwProDashboardData( + def empty: AFiData = + AFiData( glad_alerts_coverage = false, tree_cover_extent_total = ForestChangeDiagnosticDataDouble.empty, - GfwProDashboardDataDateCount.empty, - GfwProDashboardDataDateCount.empty, - GfwProDashboardDataDateCount.empty, - GfwProDashboardDataDateCount.empty + AFiDataDateCount.empty, + AFiDataDateCount.empty, + AFiDataDateCount.empty, + AFiDataDateCount.empty ) - implicit val gfwProDashboardDataSemigroup: Semigroup[GfwProDashboardData] = - new Semigroup[GfwProDashboardData] { - def combine(x: GfwProDashboardData, y: GfwProDashboardData): GfwProDashboardData = + implicit val gfwProDashboardDataSemigroup: Semigroup[AFiData] = + new Semigroup[AFiData] { + def combine(x: AFiData, y: AFiData): AFiData = x.merge(y) } - implicit def dataExpressionEncoder: ExpressionEncoder[GfwProDashboardData] = - frameless.TypedExpressionEncoder[GfwProDashboardData] - .asInstanceOf[ExpressionEncoder[GfwProDashboardData]] + implicit def dataExpressionEncoder: ExpressionEncoder[AFiData] = + frameless.TypedExpressionEncoder[AFiData] + .asInstanceOf[ExpressionEncoder[AFiData]] } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDateCount.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDateCount.scala index d5478cb6..37480afc 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDateCount.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDateCount.scala @@ -1,4 +1,4 @@ -package org.globalforestwatch.summarystats.gfwpro_dashboard +package org.globalforestwatch.summarystats.afi import io.circe.syntax._ @@ -9,16 +9,16 @@ import java.time.LocalDate import java.time.format._ import java.time.temporal._ -case class GfwProDashboardDataDateCount(value: SortedMap[String, Int]) { +case class AFiDataDateCount(value: SortedMap[String, Int]) { - def merge(other: GfwProDashboardDataDateCount): GfwProDashboardDataDateCount = { - GfwProDashboardDataDateCount(value combine other.value) + def merge(other: AFiDataDateCount): AFiDataDateCount = { + AFiDataDateCount(value combine other.value) } def toJson: String = this.value.asJson.noSpaces } -object GfwProDashboardDataDateCount { +object AFiDataDateCount { /** ex: 2016-1-1 => 2015-53 because the 1st of 2016 is Friday of the last week of 2015 */ val WeekOfYear = @@ -37,37 +37,37 @@ object GfwProDashboardDataDateCount { .appendValue(ChronoField.MONTH_OF_YEAR, 2) .toFormatter(java.util.Locale.US); - implicit def injection: Injection[GfwProDashboardDataDateCount, String] = Injection(_.toJson, fromString) + implicit def injection: Injection[AFiDataDateCount, String] = Injection(_.toJson, fromString) - def empty: GfwProDashboardDataDateCount = GfwProDashboardDataDateCount(SortedMap()) + def empty: AFiDataDateCount = AFiDataDateCount(SortedMap()) - def fillDaily(alertDate: Option[LocalDate], alertCount: Int): GfwProDashboardDataDateCount = + def fillDaily(alertDate: Option[LocalDate], alertCount: Int): AFiDataDateCount = fill(alertDate, alertCount, _.format(DateTimeFormatter.ISO_DATE)) - def fillWeekly(alertDate: Option[LocalDate], alertCount: Int): GfwProDashboardDataDateCount = + def fillWeekly(alertDate: Option[LocalDate], alertCount: Int): AFiDataDateCount = fill(alertDate, alertCount, _.format(WeekOfYear)) - def fillMonthly(alertDate: Option[LocalDate], alertCount: Int): GfwProDashboardDataDateCount = + def fillMonthly(alertDate: Option[LocalDate], alertCount: Int): AFiDataDateCount = fill(alertDate, alertCount, _.format(MonthOfYear)) def fill( alertDate: Option[LocalDate], alertCount: Int, formatter: LocalDate => String - ): GfwProDashboardDataDateCount = { + ): AFiDataDateCount = { alertDate match { case Some(date) => val dateKey: String = formatter(date) - GfwProDashboardDataDateCount(SortedMap(dateKey -> alertCount)) + AFiDataDateCount(SortedMap(dateKey -> alertCount)) case _ => this.empty } } - def fromString(value: String): GfwProDashboardDataDateCount = { + def fromString(value: String): AFiDataDateCount = { val sortedMap = io.circe.parser.decode[SortedMap[String, Int]](value) - GfwProDashboardDataDateCount(sortedMap.getOrElse(SortedMap())) + AFiDataDateCount(sortedMap.getOrElse(SortedMap())) } } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiExport.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiExport.scala index 178e5d35..e0a0bc4e 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiExport.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiExport.scala @@ -1,10 +1,10 @@ -package org.globalforestwatch.summarystats.gfwpro_dashboard +package org.globalforestwatch.summarystats.afi import org.apache.spark.sql.{DataFrame, SaveMode} import org.globalforestwatch.summarystats.SummaryExport import org.globalforestwatch.util.Util.getAnyMapValue -object GfwProDashboardExport extends SummaryExport { +object AFiExport extends SummaryExport { override val csvOptions: Map[String, String] = Map( "header" -> "true", diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGrid.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGrid.scala index e7db3c50..564f0f08 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGrid.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGrid.scala @@ -1,15 +1,15 @@ -package org.globalforestwatch.summarystats.gfwpro_dashboard +package org.globalforestwatch.summarystats.afi import geotrellis.vector.Extent import org.globalforestwatch.grids.{GridTile, TenByTen30mGrid} -object GfwProDashboardGrid - extends TenByTen30mGrid[GfwProDashboardGridSources] { +object AFiGrid + extends TenByTen30mGrid[AFiGridSources] { val gridExtent: Extent = Extent(-180.0000, -90.0000, 180.0000, 90.0000) def getSources(gridTile: GridTile, - kwargs: Map[String, Any]): GfwProDashboardGridSources = - GfwProDashboardGridSources.getCachedSources(gridTile, kwargs) + kwargs: Map[String, Any]): AFiGridSources = + AFiGridSources.getCachedSources(gridTile, kwargs) } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala index 97528329..d4fc99c9 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala @@ -1,4 +1,4 @@ -package org.globalforestwatch.summarystats.gfwpro_dashboard +package org.globalforestwatch.summarystats.afi import cats.implicits._ import geotrellis.layer.{LayoutDefinition, SpatialKey} @@ -9,38 +9,31 @@ import org.globalforestwatch.layers._ /** * @param gridTile top left corner, padded from east ex: "10N_010E" */ -case class GfwProDashboardGridSources(gridTile: GridTile, kwargs: Map[String, Any]) extends GridSources { - val gladAlerts = GladAlerts(gridTile, kwargs) - val treeCoverDensity2000 = TreeCoverDensityPercent2000(gridTile, kwargs) +case class AFiGridSources(gridTile: GridTile, kwargs: Map[String, Any]) extends GridSources { + val treeCoverLoss: TreeCoverLoss = TreeCoverLoss(gridTile, kwargs) def readWindow( windowKey: SpatialKey, windowLayout: LayoutDefinition - ): Either[Throwable, Raster[GfwProDashboardTile]] = { + ): Either[Throwable, Raster[AFiTile]] = { for { - // Glad alerts are Optional Tiles, but we keep it this way to avoid signature changes - gladAlertsTile <- Either - .catchNonFatal(gladAlerts.fetchWindow(windowKey, windowLayout)) - .right - tcd2000Tile <- Either - .catchNonFatal(treeCoverDensity2000.fetchWindow(windowKey, windowLayout)) - .right + lossTile <- Either.catchNonFatal(treeCoverLoss.fetchWindow(windowKey, windowLayout)).right } yield { - val tile = GfwProDashboardTile(gladAlertsTile, tcd2000Tile) + val tile = AFiTile(lossTile) Raster(tile, windowKey.extent(windowLayout)) } } } -object GfwProDashboardGridSources { +object AFiGridSources { @transient private lazy val cache = scala.collection.concurrent.TrieMap - .empty[String, GfwProDashboardGridSources] + .empty[String, AFiGridSources] - def getCachedSources(gridTile: GridTile, kwargs: Map[String, Any]): GfwProDashboardGridSources = { - cache.getOrElseUpdate(gridTile.tileId, GfwProDashboardGridSources(gridTile, kwargs)) + def getCachedSources(gridTile: GridTile, kwargs: Map[String, Any]): AFiGridSources = { + cache.getOrElseUpdate(gridTile.tileId, AFiGridSources(gridTile, kwargs)) } } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRDD.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRDD.scala index 68d10ffc..d74ec209 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRDD.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRDD.scala @@ -1,4 +1,4 @@ -package org.globalforestwatch.summarystats.gfwpro_dashboard +package org.globalforestwatch.summarystats.afi import cats.implicits._ import geotrellis.layer.{LayoutDefinition, SpatialKey} @@ -8,17 +8,17 @@ import geotrellis.raster.summary.polygonal._ import geotrellis.vector._ import org.globalforestwatch.summarystats.ErrorSummaryRDD -object GfwProDashboardRDD extends ErrorSummaryRDD { +object AFiRDD extends ErrorSummaryRDD { - type SOURCES = GfwProDashboardGridSources - type SUMMARY = GfwProDashboardSummary - type TILE = GfwProDashboardTile + type SOURCES = AFiGridSources + type SUMMARY = AFiSummary + type TILE = AFiTile def getSources(windowKey: SpatialKey, windowLayout: LayoutDefinition, kwargs: Map[String, Any]): Either[Throwable, SOURCES] = { Either.catchNonFatal { - GfwProDashboardGrid.getRasterSource( + AFiGrid.getRasterSource( windowKey, windowLayout, kwargs @@ -41,7 +41,7 @@ object GfwProDashboardRDD extends ErrorSummaryRDD { ): PolygonalSummaryResult[SUMMARY] = { raster.polygonalSummary( geometry, - GfwProDashboardSummary.getGridVisitor(kwargs), + AFiSummary.getGridVisitor(kwargs), options = options ) } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala index f969d92f..7d5a0fce 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala @@ -1,4 +1,4 @@ -package org.globalforestwatch.summarystats.gfwpro_dashboard +package org.globalforestwatch.summarystats.afi import cats.Semigroup @@ -6,12 +6,12 @@ import cats.Semigroup * * Note: This case class contains mutable values */ -case class GfwProDashboardRawData(var treeCoverExtentArea: Double, var alertCount: Int) { - def merge(other: GfwProDashboardRawData): GfwProDashboardRawData = { - GfwProDashboardRawData(treeCoverExtentArea + other.treeCoverExtentArea, alertCount + other.alertCount) +case class AFiRawData(var lossArea: Double) { + def merge(other: AFiRawData): AFiRawData = { + AFiRawData(lossArea + other.lossArea) } } -object GfwProDashboardRawData { - implicit val lossDataSemigroup: Semigroup[GfwProDashboardRawData] = Semigroup.instance(_ merge _) +object AFiRawData { + implicit val lossDataSemigroup: Semigroup[AFiRawData] = Semigroup.instance(_ merge _) } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala index 3274db4a..f11805ff 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala @@ -1,19 +1,18 @@ -package org.globalforestwatch.summarystats.gfwpro_dashboard +package org.globalforestwatch.summarystats.afi import org.globalforestwatch.summarystats.forest_change_diagnostic.ForestChangeDiagnosticDataDouble import java.time.LocalDate -case class GfwProDashboardRawDataGroup( - alertDate: Option[LocalDate], - gladAlertsCoverage: Boolean +case class AFiRawDataGroup( + treeCoverLoss: Integer ) { - def toGfwProDashboardData(alertCount: Int, totalArea: Double): GfwProDashboardData = { - GfwProDashboardData( + def toAFiData(alertCount: Int, totalArea: Double): AFiData = { + AFiData( glad_alerts_coverage = gladAlertsCoverage, - glad_alerts_daily = GfwProDashboardDataDateCount.fillDaily(alertDate, alertCount), - glad_alerts_weekly = GfwProDashboardDataDateCount.fillWeekly(alertDate, alertCount), - glad_alerts_monthly = GfwProDashboardDataDateCount.fillMonthly(alertDate, alertCount), - viirs_alerts_daily = GfwProDashboardDataDateCount.empty, + glad_alerts_daily = AFiDataDateCount.fillDaily(alertDate, alertCount), + glad_alerts_weekly = AFiDataDateCount.fillWeekly(alertDate, alertCount), + glad_alerts_monthly = AFiDataDateCount.fillMonthly(alertDate, alertCount), + viirs_alerts_daily = AFiDataDateCount.empty, tree_cover_extent_total = ForestChangeDiagnosticDataDouble.fill(totalArea)) } } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala index a41e9cbc..281f38d7 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala @@ -1,4 +1,4 @@ -package org.globalforestwatch.summarystats.gfwpro_dashboard +package org.globalforestwatch.summarystats.afi import cats.implicits._ import geotrellis.raster._ @@ -9,53 +9,48 @@ import org.globalforestwatch.util.Geodesy import java.time.LocalDate /** LossData Summary by year */ -case class GfwProDashboardSummary( - stats: Map[GfwProDashboardRawDataGroup, GfwProDashboardRawData] = Map.empty - ) extends Summary[GfwProDashboardSummary] { +case class AFiSummary( + stats: Map[AFiRawDataGroup, AFiRawData] = Map.empty + ) extends Summary[AFiSummary] { /** Combine two Maps and combine their LossData when a year is present in both */ - def merge(other: GfwProDashboardSummary): GfwProDashboardSummary = { + def merge(other: AFiSummary): AFiSummary = { // the years.combine method uses LossData.lossDataSemigroup instance to perform per value combine on the map - GfwProDashboardSummary(stats.combine(other.stats)) + AFiSummary(stats.combine(other.stats)) } def isEmpty = stats.isEmpty - def toGfwProDashboardData(): GfwProDashboardData = { + def toAFiData(): AFiData = { stats - .map { case (group, data) => group.toGfwProDashboardData(data.alertCount, data.treeCoverExtentArea) } - .foldLeft(GfwProDashboardData.empty)( _ merge _) + .map { case (group, data) => group.toAFiData(data.alertCount, data.treeCoverExtentArea) } + .foldLeft(AFiData.empty)( _ merge _) } } -object GfwProDashboardSummary { +object AFiSummary { - def getGridVisitor(kwargs: Map[String, Any]): GridVisitor[Raster[GfwProDashboardTile], GfwProDashboardSummary] = - new GridVisitor[Raster[GfwProDashboardTile], GfwProDashboardSummary] { - private var acc: GfwProDashboardSummary = - new GfwProDashboardSummary() + def getGridVisitor(kwargs: Map[String, Any]): GridVisitor[Raster[AFiTile], AFiSummary] = + new GridVisitor[Raster[AFiTile], AFiSummary] { + private var acc: AFiSummary = + new AFiSummary() - def result: GfwProDashboardSummary = acc + def result: AFiSummary = acc - def visit(raster: Raster[GfwProDashboardTile], col: Int, row: Int): Unit = { - val tcd2000: Integer = raster.tile.tcd2000.getData(col, row) - val gladAlertDate: Option[LocalDate] = raster.tile.gladAlerts.getData(col, row).map { case (date, _) => date } - val gladAlertCoverage = raster.tile.gladAlerts.t.isDefined - val isTreeCoverExtent30: Boolean = tcd2000 > 30 + def visit(raster: Raster[AFiTile], col: Int, row: Int): Unit = { + val lossYear: Integer = raster.tile.treeCoverLoss.getData(col, row) - val groupKey = GfwProDashboardRawDataGroup(gladAlertDate, gladAlertsCoverage = gladAlertCoverage) - val summaryData = acc.stats.getOrElse(groupKey, GfwProDashboardRawData(treeCoverExtentArea = 0.0, alertCount = 0)) + val iso = ... + val adm1 = ... + val adm2: Integer = ... - if (isTreeCoverExtent30) { - val areaHa = Geodesy.pixelArea(lat = raster.rasterExtent.gridRowToMap(row), raster.cellSize) / 10000.0 - summaryData.treeCoverExtentArea += areaHa - } + val groupKey = AFiRawDataGroup(iso, adm1, adm2, lossYear) + val area = ... - if (gladAlertDate.isDefined) { - summaryData.alertCount += 1 - } + val summaryData = acc.stats.getOrElse(groupKey, AFiRawData(lossArea = 0)) + summaryData.lossArea += area val new_stats = acc.stats.updated(groupKey, summaryData) - acc = GfwProDashboardSummary(new_stats) + acc = AFiSummary(new_stats) } } } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala index 045b86c2..285add67 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala @@ -1,4 +1,4 @@ -package org.globalforestwatch.summarystats.gfwpro_dashboard +package org.globalforestwatch.summarystats.afi import geotrellis.raster.{CellGrid, CellType, IntCellType} import org.globalforestwatch.layers._ @@ -8,14 +8,13 @@ import org.globalforestwatch.layers._ * Tile-like structure to hold tiles from datasets required for our summary. * We can not use GeoTrellis MultibandTile because it requires all bands share a CellType. */ -case class GfwProDashboardTile( - gladAlerts: GladAlerts#OptionalITile, - tcd2000: TreeCoverDensityPercent2000#ITile +case class AFiTile( + treeCoverLoss: TreeCoverLoss#ITile, ) extends CellGrid[Int] { - def cellType: CellType = gladAlerts.cellType.getOrElse(IntCellType) + def cellType: CellType = treeCoverLoss.cellType - def cols: Int = gladAlerts.cols.getOrElse(GfwProDashboardGrid.blockSize) + def cols: Int = treeCoverLoss.cols - def rows: Int = gladAlerts.rows.getOrElse(GfwProDashboardGrid.blockSize) + def rows: Int = treeCoverLoss.rows } From c29d705ef5e3ad7288373905dd5174138ce974b2 Mon Sep 17 00:00:00 2001 From: Justin Terry Date: Wed, 28 Jun 2023 15:39:06 -0700 Subject: [PATCH 03/33] Set up analysis --- .../summarystats/afi/AFiAnalysis.scala | 144 ++---------------- .../summarystats/afi/AFiDF.scala | 29 +--- .../summarystats/afi/AFiGridSources.scala | 7 +- 3 files changed, 15 insertions(+), 165 deletions(-) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index daf0e5ae..9dbfbed4 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -1,5 +1,6 @@ package org.globalforestwatch.summarystats.afi +import cats.data.Validated.{Invalid, Valid} import cats.data.{NonEmptyList, Validated} import geotrellis.vector.{Feature, Geometry} import geotrellis.store.index.zcurve.Z2 @@ -13,16 +14,7 @@ import org.globalforestwatch.ValidatedWorkflow import org.apache.spark.rdd.RDD import org.apache.spark.sql.SparkSession import org.apache.spark.storage.StorageLevel -import org.globalforestwatch.features.FeatureId -import org.apache.sedona.sql.utils.Adapter -import org.apache.sedona.core.spatialRDD.SpatialRDD -import org.globalforestwatch.util.GeotrellisGeometryValidator.makeValidGeom -import scala.collection.JavaConverters._ -import java.time.LocalDate -import org.globalforestwatch.util.IntersectGeometry - -import scala.reflect.ClassTag object AFiAnalysis extends SummaryAnalysis { @@ -31,140 +23,22 @@ object AFiAnalysis extends SummaryAnalysis { def apply( featureRDD: RDD[ValidatedLocation[Geometry]], featureType: String, - contextualFeatureType: String, - contextualFeatureUrl: NonEmptyList[String], - fireAlertRDD: SpatialRDD[Geometry], spark: SparkSession, kwargs: Map[String, Any] ): Unit = { featureRDD.persist(StorageLevel.MEMORY_AND_DISK) - val summaryRDD = ValidatedWorkflow(featureRDD).flatMap { rdd => - val spatialContextualDF = SpatialFeatureDF(contextualFeatureUrl, contextualFeatureType, FeatureFilter.empty, "geom", spark) - val spatialContextualRDD = Adapter.toSpatialRdd(spatialContextualDF, "polyshape") - val spatialFeatureRDD = RDDAdapter.toSpatialRDDfromLocationRdd(rdd, spark) - - /* Enrich the feature RDD by intersecting it with contextual features - * The resulting FeatuerId carries combined identity of source fature and contextual geometry - */ - val enrichedRDD = - SpatialJoinRDD - .flatSpatialJoin(spatialContextualRDD, spatialFeatureRDD, considerBoundaryIntersection = true, usingIndex = true) - .rdd - .flatMap { case (feature, context) => - refineContextualIntersection(feature, context, contextualFeatureType) - } - - ValidatedWorkflow(enrichedRDD) - .mapValidToValidated { rdd => - rdd.map { case row@Location(fid, geom) => - if (geom.isEmpty()) { - Validated.invalid[Location[JobError], Location[Geometry]](Location(fid, GeometryError(s"Empty Geometry"))) - } else if (!geom.isValid) { - Validated.invalid[Location[JobError], Location[Geometry]](Location(fid, GeometryError(s"Invalid Geometry"))) - } else - Validated.valid[Location[JobError], Location[Geometry]](row) - } - } - .flatMap { enrichedRDD => - val fireStatsRDD = fireStats(enrichedRDD, fireAlertRDD, spark) - val tmp = enrichedRDD.map { case Location(id, geom) => Feature(geom, id) } - val validatedSummaryStatsRdd = AFiRDD(tmp, AFiGrid.blockTileGrid, kwargs) - ValidatedWorkflow(validatedSummaryStatsRdd).mapValid { summaryStatsRDD => - // fold in fireStatsRDD after polygonal summary and accumulate the errors - summaryStatsRDD - .mapValues(_.toAFiData()) - .leftOuterJoin(fireStatsRDD) - .mapValues { case (data, fire) => - data.copy(viirs_alerts_daily = fire.getOrElse(AFiDataDateCount.empty)) - } - } - } + // TODO invalid should map to job error somehow, probably using ValidatedWorkflow + val validatedRDD = featureRDD.map{ + case Validated.Valid(Location(id, geom: Geometry)) => Feature(geom, id) + case Validated.Invalid(Location(id, geom: Geometry)) => Feature(geom, id) } - val summaryDF = AFiDF.getFeatureDataFrameFromVerifiedRdd(summaryRDD.unify, spark) + val summaryRDD = AFiRDD(validatedRDD, AFiGrid.blockTileGrid, kwargs) + + // TODO somehow convert AFiSummary to AFiData + val summaryDF = AFiDF.getFeatureDataFrame(summaryRDD, spark) val runOutputUrl: String = getOutputUrl(kwargs) AFiExport.export(featureType, summaryDF, runOutputUrl, kwargs) - - } - - /** These geometries touch, apply application specific logic of how to treat that. - * - For intersection of location geometries only keep those where centroid of location is in the contextual geom (this ensures that - * any location is only assigned to a single contextual area even if it intersects more) - * - For dissolved geometry of list report all contextual areas it intersects - */ - private def refineContextualIntersection( - featureGeom: Geometry, - contextualGeom: Geometry, - contextualFeatureType: String - ): List[ValidatedLocation[Geometry]] = { - val featureId = featureGeom.getUserData.asInstanceOf[FeatureId] - val contextualId = FeatureId.fromUserData(contextualFeatureType, contextualGeom.getUserData.asInstanceOf[String], delimiter = ",") - - featureId match { - case gfwproId: GfwProFeatureId if gfwproId.locationId >= 0 => - val featureCentroid = createPoint(featureGeom.getCentroid.getX, featureGeom.getCentroid.getY) - if (contextualGeom.contains(featureCentroid)) { - val fid = CombinedFeatureId(gfwproId, contextualId) - // val gtGeom: Geometry = toGeotrellisGeometry(featureGeom) - val fixedGeom = makeValidGeom(featureGeom) - List(Validated.Valid(Location(fid, fixedGeom))) - } else Nil - - case gfwproId: GfwProFeatureId if gfwproId.locationId < 0 => - IntersectGeometry - .validatedIntersection(featureGeom, contextualGeom) - .leftMap { err => Location(featureId, err) } - .map { geometries => - geometries.map { geom => - // val gtGeom: Geometry = toGeotrellisGeometry(geom) - val fixedGeom = makeValidGeom(geom) - Location(CombinedFeatureId(gfwproId, contextualId), fixedGeom) - } - } - .traverse(identity) // turn validated list of geometries into list of validated geometries - } - } - - private def partitionByZIndex[A: ClassTag](rdd: RDD[A])(getGeom: A => Geometry): RDD[A] = { - val hashPartitioner = new HashPartitioner(rdd.getNumPartitions) - - rdd - .keyBy({ row => - val geom = getGeom(row) - Z2( - (geom.getCentroid.getX * 100).toInt, - (geom.getCentroid.getY * 100).toInt - ).z - }) - .partitionBy(hashPartitioner) - .mapPartitions( - { iter: Iterator[(Long, A)] => - for (i <- iter) yield i._2 - }, - preservesPartitioning = true - ) - } - - private def fireStats( - featureRDD: RDD[Location[Geometry]], - fireAlertRDD: SpatialRDD[Geometry], - spark: SparkSession - ): RDD[Location[AFiDataDateCount]] = { - val featureSpatialRDD = RDDAdapter.toSpatialRDDfromLocationRdd(featureRDD, spark) - val joinedRDD = SpatialJoinRDD.spatialjoin(featureSpatialRDD, fireAlertRDD) - - joinedRDD.rdd - .map { case (poly, points) => - val fid = poly.getUserData.asInstanceOf[FeatureId] - val data = points.asScala.foldLeft(AFiDataDateCount.empty) { (z, point) => - // extract year from acq_date column is YYYY-MM-DD - val acqDate = point.getUserData.asInstanceOf[String].split("\t")(2) - val alertDate = LocalDate.parse(acqDate) - z.merge(AFiDataDateCount.fillDaily(Some(alertDate), 1)) - } - (fid, data) - } - .reduceByKey(_ merge _) } } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala index 79076f64..7c54f9b0 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala @@ -4,37 +4,14 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, SparkSession} import org.globalforestwatch.features.{CombinedFeatureId, FeatureId, GadmFeatureId, GfwProFeatureId} import org.globalforestwatch.summarystats._ -import cats.data.Validated.{Valid, Invalid} +import cats.data.Validated.{Invalid, Valid} +import org.globalforestwatch.summarystats.forest_change_diagnostic.ForestChangeDiagnosticData object AFiDF extends SummaryDF { case class RowGadmId(list_id: String, location_id: String, gadm_id: String) - def getFeatureDataFrameFromVerifiedRdd( - dataRDD: RDD[ValidatedLocation[AFiData]], - spark: SparkSession - ): DataFrame = { - import spark.implicits._ - - val rowId: FeatureId => RowGadmId = { - case CombinedFeatureId(proId: GfwProFeatureId, gadmId: GadmFeatureId) => - RowGadmId(proId.listId, proId.locationId.toString, gadmId.toString()) - case proId: GfwProFeatureId => - RowGadmId(proId.listId, proId.locationId.toString, "none") - case _ => - throw new IllegalArgumentException("Not a CombinedFeatureId[GfwProFeatureId, GadmFeatureId]") - } - dataRDD.map { - case Valid(Location(id, data)) => - (rowId(id), SummaryDF.RowError.empty, data) - case Invalid(Location(id, err)) => - (rowId(id), SummaryDF.RowError.fromJobError(err), AFiData.empty) - } - .toDF("id", "error", "data") - .select($"id.*", $"error.*", $"data.*") - } - def getFeatureDataFrame( - dataRDD: RDD[(FeatureId, ValidatedRow[AFiData])], + dataRDD: RDD[ValidatedLocation[AFiData]], spark: SparkSession ): DataFrame = { import spark.implicits._ diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala index d4fc99c9..5a7807bb 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala @@ -13,10 +13,9 @@ case class AFiGridSources(gridTile: GridTile, kwargs: Map[String, Any]) extends val treeCoverLoss: TreeCoverLoss = TreeCoverLoss(gridTile, kwargs) def readWindow( - windowKey: SpatialKey, - windowLayout: LayoutDefinition - ): Either[Throwable, Raster[AFiTile]] = { - + windowKey: SpatialKey, + windowLayout: LayoutDefinition + ): Either[Throwable, Raster[AFiTile]] = { for { lossTile <- Either.catchNonFatal(treeCoverLoss.fetchWindow(windowKey, windowLayout)).right } yield { From ce68aa741bfff88e6ea8ab9c099debc5b9234e67 Mon Sep 17 00:00:00 2001 From: manukala6 Date: Wed, 5 Jul 2023 11:00:01 -0700 Subject: [PATCH 04/33] Add GADM rasters --- src/main/resources/raster-catalog-pro.json | 12 ++++++++++++ .../globalforestwatch/layers/GADMadm0.scala | 18 ++++++++++++++++++ .../globalforestwatch/layers/GADMadm1.scala | 18 ++++++++++++++++++ .../globalforestwatch/layers/GADMadm2.scala | 18 ++++++++++++++++++ 4 files changed, 66 insertions(+) create mode 100644 src/main/scala/org/globalforestwatch/layers/GADMadm0.scala create mode 100644 src/main/scala/org/globalforestwatch/layers/GADMadm1.scala create mode 100644 src/main/scala/org/globalforestwatch/layers/GADMadm2.scala diff --git a/src/main/resources/raster-catalog-pro.json b/src/main/resources/raster-catalog-pro.json index 8d93add9..0e250c12 100644 --- a/src/main/resources/raster-catalog-pro.json +++ b/src/main/resources/raster-catalog-pro.json @@ -295,6 +295,18 @@ { "name": "arg_native_forest_land_plan", "source_uri": "s3://gfw-data-lake/arg_native_forest_land_plan/v202212/raster/epsg-4326/{grid_size}/{row_count}/category/gdal-geotiff/{tile_id}.tif" + }, + { + "name": "gadm_adm0", + "source_uri": "s3://gfw-data-lake/gadm_administrative_boundaries/v3.6/raster/epsg-4326/{grid_size}/{row_count}/adm0/geotiff/{tile_id}.tif" + }, + { + "name": "gadm_adm1", + "source_uri": "s3://gfw-data-lake/gadm_administrative_boundaries/v3.6/raster/epsg-4326/{grid_size}/{row_count}/adm1/geotiff/{tile_id}.tif" + }, + { + "name": "gadm_adm2", + "source_uri": "s3://gfw-data-lake/gadm_administrative_boundaries/v3.6/raster/epsg-4326/{grid_size}/{row_count}/adm2/geotiff/{tile_id}.tif" } ] } \ No newline at end of file diff --git a/src/main/scala/org/globalforestwatch/layers/GADMadm0.scala b/src/main/scala/org/globalforestwatch/layers/GADMadm0.scala new file mode 100644 index 00000000..549cfb35 --- /dev/null +++ b/src/main/scala/org/globalforestwatch/layers/GADMadm0.scala @@ -0,0 +1,18 @@ +package org.globalforestwatch.layers + +import org.globalforestwatch.grids.GridTile + +case class GADMadm0(gridTile: GridTile, kwargs: Map[String, Any]) + extends IntegerLayer + with OptionalILayer { + + val datasetName: String = "gadm_adm0" + val uri: String = + uriForGrid(gridTile, kwargs) + + override def lookup(value: Int): Integer = + if (value == 9999) null else value + + + } + \ No newline at end of file diff --git a/src/main/scala/org/globalforestwatch/layers/GADMadm1.scala b/src/main/scala/org/globalforestwatch/layers/GADMadm1.scala new file mode 100644 index 00000000..703581d2 --- /dev/null +++ b/src/main/scala/org/globalforestwatch/layers/GADMadm1.scala @@ -0,0 +1,18 @@ +package org.globalforestwatch.layers + +import org.globalforestwatch.grids.GridTile + +case class GADMadm1(gridTile: GridTile, kwargs: Map[String, Any]) + extends IntegerLayer + with OptionalILayer { + + val datasetName: String = "gadm_adm1" + val uri: String = + uriForGrid(gridTile, kwargs) + + override def lookup(value: Int): Integer = + if (value == 9999) null else value + + + } + \ No newline at end of file diff --git a/src/main/scala/org/globalforestwatch/layers/GADMadm2.scala b/src/main/scala/org/globalforestwatch/layers/GADMadm2.scala new file mode 100644 index 00000000..da938fb3 --- /dev/null +++ b/src/main/scala/org/globalforestwatch/layers/GADMadm2.scala @@ -0,0 +1,18 @@ +package org.globalforestwatch.layers + +import org.globalforestwatch.grids.GridTile + +case class GADMadm2(gridTile: GridTile, kwargs: Map[String, Any]) + extends IntegerLayer + with OptionalILayer { + + val datasetName: String = "gadm_adm2" + val uri: String = + uriForGrid(gridTile, kwargs) + + override def lookup(value: Int): Integer = + if (value == 9999) null else value + + + } + \ No newline at end of file From c40215d0ea1c7573f224beb8538a652e936cb004 Mon Sep 17 00:00:00 2001 From: manukala6 Date: Fri, 21 Jul 2023 13:29:00 -0400 Subject: [PATCH 05/33] GTC-2436 Add SBTN Natural Forests --- src/main/resources/raster-catalog-pro.json | 4 ++++ .../layers/SBTNNaturalForests.scala | 20 +++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 src/main/scala/org/globalforestwatch/layers/SBTNNaturalForests.scala diff --git a/src/main/resources/raster-catalog-pro.json b/src/main/resources/raster-catalog-pro.json index 0e250c12..5ff2db82 100644 --- a/src/main/resources/raster-catalog-pro.json +++ b/src/main/resources/raster-catalog-pro.json @@ -307,6 +307,10 @@ { "name": "gadm_adm2", "source_uri": "s3://gfw-data-lake/gadm_administrative_boundaries/v3.6/raster/epsg-4326/{grid_size}/{row_count}/adm2/geotiff/{tile_id}.tif" + }, + { + "name": "sbtn_natural_forests_map", + "source_uri": "s3://gfw-data-lake/sbtn_natural_forests_map/v202305/raster/epsg-4326/{grid_size}/{row_count}/class/gdal-geotiff/{tile_id}.tif" } ] } \ No newline at end of file diff --git a/src/main/scala/org/globalforestwatch/layers/SBTNNaturalForests.scala b/src/main/scala/org/globalforestwatch/layers/SBTNNaturalForests.scala new file mode 100644 index 00000000..82bc3b24 --- /dev/null +++ b/src/main/scala/org/globalforestwatch/layers/SBTNNaturalForests.scala @@ -0,0 +1,20 @@ +package org.globalforestwatch.layers + +import org.globalforestwatch.grids.GridTile + +case class SBTNNaturalForests(gridTile: GridTile, kwargs: Map[String, Any]) + extends StringLayer + with OptionalILayer { + + val datasetName = "sbtn_natural_forests_map" + val uri: String = uriForGrid(gridTile, kwargs) + + override val externalNoDataValue = "Unknown" + + def lookup(value: Int): String = value match { + case 0 => "Non-Forest" + case 1 => "Natural Forest" + case 2 => "Non-Natural Forest" + case _ => "Unknown" + } +} \ No newline at end of file From a515a685ac625b1ae33182e2c810223ff3b62928 Mon Sep 17 00:00:00 2001 From: Justin Terry Date: Mon, 31 Jul 2023 14:30:52 -0700 Subject: [PATCH 06/33] Make analysis run --- .../summarystats/SummaryMain.scala | 4 +- .../summarystats/afi/AFiAnalysis.scala | 13 ++- .../summarystats/afi/AFiDF.scala | 37 +++++--- .../summarystats/afi/AFiData.scala | 35 ++----- .../summarystats/afi/AFiDataDateCount.scala | 73 -------------- .../summarystats/afi/AFiDataDouble.scala | 19 ++++ .../summarystats/afi/AFiDataLossYearly.scala | 94 +++++++++++++++++++ .../summarystats/afi/AFiDataParser.scala | 13 +++ .../summarystats/afi/AFiRawData.scala | 4 +- .../summarystats/afi/AFiRawDataGroup.scala | 12 +-- .../summarystats/afi/AFiSummary.scala | 27 ++++-- .../ForestChangeDiagnosticDataDouble.scala | 1 + .../GfwProDashboardSummary.scala | 3 +- 13 files changed, 193 insertions(+), 142 deletions(-) delete mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDateCount.scala create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDouble.scala create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearly.scala create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataParser.scala diff --git a/src/main/scala/org/globalforestwatch/summarystats/SummaryMain.scala b/src/main/scala/org/globalforestwatch/summarystats/SummaryMain.scala index 0d50a3d3..15503712 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/SummaryMain.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/SummaryMain.scala @@ -9,6 +9,7 @@ import org.globalforestwatch.summarystats.gfwpro_dashboard.GfwProDashboardComman import org.globalforestwatch.summarystats.gladalerts.GladAlertsCommand.gladAlertsCommand import org.globalforestwatch.summarystats.treecoverloss.TreeCoverLossCommand.treeCoverLossCommand import org.globalforestwatch.summarystats.integrated_alerts.IntegratedAlertsCommand.integratedAlertsCommand +import org.globalforestwatch.summarystats.afi.AFiCommand.afiCommand import com.monovore.decline._ import org.globalforestwatch.config.GfwConfig @@ -25,7 +26,8 @@ object SummaryMain { gfwProDashboardCommand orElse gladAlertsCommand orElse treeCoverLossCommand orElse - integratedAlertsCommand + integratedAlertsCommand orElse + afiCommand } val command = Command(name, header, true)(main) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index 9dbfbed4..e05c8004 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -18,7 +18,7 @@ import org.apache.spark.storage.StorageLevel object AFiAnalysis extends SummaryAnalysis { - val name = "aafi" + val name = "afi" def apply( featureRDD: RDD[ValidatedLocation[Geometry]], @@ -34,10 +34,17 @@ object AFiAnalysis extends SummaryAnalysis { case Validated.Invalid(Location(id, geom: Geometry)) => Feature(geom, id) } - val summaryRDD = AFiRDD(validatedRDD, AFiGrid.blockTileGrid, kwargs) + val summaryRDD: RDD[ValidatedLocation[AFiSummary]] = AFiRDD(validatedRDD, AFiGrid.blockTileGrid, kwargs) + val dataRDD: RDD[ValidatedLocation[AFiData]] = ValidatedWorkflow(summaryRDD).mapValid { summaries => + summaries + .mapValues { + case summary: AFiSummary => summary.toAFiData() + } + }.unify + // TODO somehow convert AFiSummary to AFiData - val summaryDF = AFiDF.getFeatureDataFrame(summaryRDD, spark) + val summaryDF = AFiDF.getFeatureDataFrame(dataRDD, spark) val runOutputUrl: String = getOutputUrl(kwargs) AFiExport.export(featureType, summaryDF, runOutputUrl, kwargs) } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala index 7c54f9b0..bd887560 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala @@ -2,10 +2,11 @@ package org.globalforestwatch.summarystats.afi import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, SparkSession} -import org.globalforestwatch.features.{CombinedFeatureId, FeatureId, GadmFeatureId, GfwProFeatureId} +import org.globalforestwatch.features.{CombinedFeatureId, FeatureId, GadmFeatureId, GfwProFeatureId, WdpaFeatureId} import org.globalforestwatch.summarystats._ import cats.data.Validated.{Invalid, Valid} -import org.globalforestwatch.summarystats.forest_change_diagnostic.ForestChangeDiagnosticData +import org.globalforestwatch.summarystats.SummaryDF.{RowError, RowId} +import org.globalforestwatch.util.Util.fieldsFromCol object AFiDF extends SummaryDF { case class RowGadmId(list_id: String, location_id: String, gadm_id: String) @@ -16,19 +17,25 @@ object AFiDF extends SummaryDF { ): DataFrame = { import spark.implicits._ - dataRDD.mapValues { - case Valid(data) => - (SummaryDF.RowError.empty, data) - case Invalid(err) => - (SummaryDF.RowError.fromJobError(err), AFiData.empty) - }.map { - case (CombinedFeatureId(proId: GfwProFeatureId, gadmId: GadmFeatureId), (error, data)) => - val rowId = RowGadmId(proId.listId, proId.locationId.toString, gadmId.toString()) - (rowId, error, data) - case _ => - throw new IllegalArgumentException("Not a CombinedFeatureId[GfwProFeatureId, GadmFeatureId]") + val rowId: FeatureId => RowId = { + case gfwproId: GfwProFeatureId => + RowId(gfwproId.listId, gfwproId.locationId.toString) + case gadmId: GadmFeatureId => + RowId("GADM 3.6", gadmId.toString) + case wdpaId: WdpaFeatureId => + RowId("WDPA", wdpaId.toString) + case id => + throw new IllegalArgumentException(s"Can't produce DataFrame for $id") } - .toDF("id", "error", "data") - .select($"id.*", $"error.*", $"data.*") + + dataRDD + .map { + case Valid(Location(fid, data)) => + (rowId(fid), RowError.empty, data) + case Invalid(Location(fid, err)) => + (rowId(fid), RowError.fromJobError(err), AFiData.empty) + } + .toDF("id", "error", "data") + .select($"id.*", $"error.*", $"data.*") } } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala index 8fde5d38..9a33c6cd 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala @@ -1,7 +1,9 @@ package org.globalforestwatch.summarystats.afi import cats.Semigroup -import org.globalforestwatch.summarystats.forest_change_diagnostic.ForestChangeDiagnosticDataDouble +import frameless.TypedEncoder.bigDecimalEncoder + +import scala.collection.immutable.SortedMap import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder /** Summary data per class @@ -9,28 +11,12 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder * Note: This case class contains mutable values */ case class AFiData( - /** Location intersects GLAD Alert tiles, GLAD alerts are possible */ - glad_alerts_coverage: Boolean, - /** How many hacters of location geometry had tree cover extent > 30% in 2000 */ - tree_cover_extent_total: ForestChangeDiagnosticDataDouble, - /** GLAD alert count within location geometry grouped by day */ - glad_alerts_daily: AFiDataDateCount, - /** GLAD alert count within location geometry grouped by ISO year-week */ - glad_alerts_weekly: AFiDataDateCount, - /** GLAD alert count within location geometry grouped by year-month */ - glad_alerts_monthly: AFiDataDateCount, - /** VIIRS alerts for location geometry grouped by day */ - viirs_alerts_daily: AFiDataDateCount, + treeCoverLoss: AFiDataLossYearly ) { def merge(other: AFiData): AFiData = { AFiData( - glad_alerts_coverage || other.glad_alerts_coverage, - tree_cover_extent_total.merge(other.tree_cover_extent_total), - glad_alerts_daily.merge(other.glad_alerts_daily), - glad_alerts_weekly.merge(other.glad_alerts_weekly), - glad_alerts_monthly.merge(other.glad_alerts_monthly), - viirs_alerts_daily.merge(other.viirs_alerts_daily) + treeCoverLoss.merge(other.treeCoverLoss) ) } } @@ -38,16 +24,9 @@ case class AFiData( object AFiData { def empty: AFiData = - AFiData( - glad_alerts_coverage = false, - tree_cover_extent_total = ForestChangeDiagnosticDataDouble.empty, - AFiDataDateCount.empty, - AFiDataDateCount.empty, - AFiDataDateCount.empty, - AFiDataDateCount.empty - ) + AFiData(AFiDataLossYearly.empty) - implicit val gfwProDashboardDataSemigroup: Semigroup[AFiData] = + implicit val afiDataSemigroup: Semigroup[AFiData] = new Semigroup[AFiData] { def combine(x: AFiData, y: AFiData): AFiData = x.merge(y) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDateCount.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDateCount.scala deleted file mode 100644 index 37480afc..00000000 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDateCount.scala +++ /dev/null @@ -1,73 +0,0 @@ -package org.globalforestwatch.summarystats.afi - -import io.circe.syntax._ - -import scala.collection.immutable.SortedMap -import frameless.Injection -import cats.implicits._ -import java.time.LocalDate -import java.time.format._ -import java.time.temporal._ - -case class AFiDataDateCount(value: SortedMap[String, Int]) { - - def merge(other: AFiDataDateCount): AFiDataDateCount = { - AFiDataDateCount(value combine other.value) - } - - def toJson: String = this.value.asJson.noSpaces -} - -object AFiDataDateCount { - - /** ex: 2016-1-1 => 2015-53 because the 1st of 2016 is Friday of the last week of 2015 */ - val WeekOfYear = - new DateTimeFormatterBuilder() - .parseCaseInsensitive() - .appendValue(IsoFields.WEEK_BASED_YEAR, 4, 10, SignStyle.EXCEEDS_PAD) - .appendLiteral("-") - .appendValue(IsoFields.WEEK_OF_WEEK_BASED_YEAR, 2) - .toFormatter(java.util.Locale.US); - - val MonthOfYear = - new DateTimeFormatterBuilder() - .parseCaseInsensitive() - .appendValue(ChronoField.YEAR, 4, 10, SignStyle.EXCEEDS_PAD) - .appendLiteral("-") - .appendValue(ChronoField.MONTH_OF_YEAR, 2) - .toFormatter(java.util.Locale.US); - - implicit def injection: Injection[AFiDataDateCount, String] = Injection(_.toJson, fromString) - - def empty: AFiDataDateCount = AFiDataDateCount(SortedMap()) - - def fillDaily(alertDate: Option[LocalDate], alertCount: Int): AFiDataDateCount = - fill(alertDate, alertCount, _.format(DateTimeFormatter.ISO_DATE)) - - def fillWeekly(alertDate: Option[LocalDate], alertCount: Int): AFiDataDateCount = - fill(alertDate, alertCount, _.format(WeekOfYear)) - - def fillMonthly(alertDate: Option[LocalDate], alertCount: Int): AFiDataDateCount = - fill(alertDate, alertCount, _.format(MonthOfYear)) - - def fill( - alertDate: Option[LocalDate], - alertCount: Int, - formatter: LocalDate => String - ): AFiDataDateCount = { - - alertDate match { - case Some(date) => - val dateKey: String = formatter(date) - AFiDataDateCount(SortedMap(dateKey -> alertCount)) - - case _ => - this.empty - } - } - - def fromString(value: String): AFiDataDateCount = { - val sortedMap = io.circe.parser.decode[SortedMap[String, Int]](value) - AFiDataDateCount(sortedMap.getOrElse(SortedMap())) - } -} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDouble.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDouble.scala new file mode 100644 index 00000000..9a1a5efc --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDouble.scala @@ -0,0 +1,19 @@ +package org.globalforestwatch.summarystats.afi + +import io.circe.syntax._ + +case class AFiDataDouble(value: Double) extends AFiDataParser[AFiDataDouble] { + def merge( + other: AFiDataDouble + ): AFiDataDouble = { + AFiDataDouble(value + other.value) + } + + def round: Double = this.round(value) + + def toJson: String = { + this.round.asJson.noSpaces + } +} + + diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearly.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearly.scala new file mode 100644 index 00000000..505d9b4b --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearly.scala @@ -0,0 +1,94 @@ +package org.globalforestwatch.summarystats.afi + +import cats.implicits._ +import cats.kernel.Semigroup +import frameless.Injection +import io.circe.syntax._ +import scala.collection.immutable.SortedMap +import io.circe.syntax._ +import io.circe.parser.decode +import cats.kernel.Semigroup +import cats.implicits._ + +import scala.collection.immutable.SortedMap + +case class AFiDataLossYearly(value: SortedMap[Int, Double]) + extends AFiDataParser[AFiDataLossYearly] { + + def merge(other: AFiDataLossYearly): AFiDataLossYearly = { + AFiDataLossYearly(Semigroup[SortedMap[Int, Double]].combine(value, other.value)) + } + + def round: SortedMap[Int, Double] = this.value.map { case (key, value) => key -> this.round(value) } + + def limitToMaxYear(maxYear: Int): AFiDataLossYearly = { + AFiDataLossYearly(value.filterKeys{ year => year <= maxYear }) + } + + def toJson: String = { + this.round.asJson.noSpaces + } +} + +object AFiDataLossYearly { + def empty: AFiDataLossYearly = + AFiDataLossYearly( + SortedMap() + ) + + def prefilled: AFiDataLossYearly = + AFiDataLossYearly( + SortedMap( + 2001 -> 0, + 2002 -> 0, + 2003 -> 0, + 2004 -> 0, + 2005 -> 0, + 2006 -> 0, + 2007 -> 0, + 2008 -> 0, + 2009 -> 0, + 2010 -> 0, + 2011 -> 0, + 2012 -> 0, + 2013 -> 0, + 2014 -> 0, + 2015 -> 0, + 2016 -> 0, + 2017 -> 0, + 2018 -> 0, + 2019 -> 0, + 2020 -> 0, + 2021 -> 0, + ) + ) + + def fill(lossYear: Int, + areaHa: Double, + include: Boolean = true): AFiDataLossYearly = { + + // Only except lossYear values within range of default map + val minLossYear: Int = this.prefilled.value.keysIterator.min + val maxLossYear: Int = this.prefilled.value.keysIterator.max + + if (minLossYear <= lossYear && lossYear <= maxLossYear && include) { + AFiDataLossYearly.prefilled.merge( + AFiDataLossYearly( + SortedMap( + lossYear -> areaHa + ) + ) + ) + } else + this.empty + } + + def fromString(value: String): AFiDataLossYearly = { + val sortedMap = decode[SortedMap[Int, Double]](value) + AFiDataLossYearly(sortedMap.getOrElse(SortedMap())) + } + + implicit def injection: Injection[AFiDataLossYearly, String] = Injection(_.toJson, fromString) +} + + diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataParser.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataParser.scala new file mode 100644 index 00000000..bcf80834 --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataParser.scala @@ -0,0 +1,13 @@ +package org.globalforestwatch.summarystats.afi + +trait AFiDataParser[Self <: AFiDataParser[Self]] { + val value: Any + + def merge(other: Self): Self + + def toJson: String + + protected def round(value: Double, digits: Int = 4): Double = { + Math.round(value * math.pow(10, digits)) / math.pow(10, digits) + } +} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala index 7d5a0fce..b50b9075 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala @@ -6,9 +6,9 @@ import cats.Semigroup * * Note: This case class contains mutable values */ -case class AFiRawData(var lossArea: Double) { +case class AFiRawData(var treeCoverLossArea: Double) { def merge(other: AFiRawData): AFiRawData = { - AFiRawData(lossArea + other.lossArea) + AFiRawData(treeCoverLossArea + other.treeCoverLossArea) } } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala index f11805ff..0db7587f 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala @@ -4,15 +4,9 @@ import org.globalforestwatch.summarystats.forest_change_diagnostic.ForestChangeD import java.time.LocalDate case class AFiRawDataGroup( - treeCoverLoss: Integer + treeCoverLossYear: Int ) { - def toAFiData(alertCount: Int, totalArea: Double): AFiData = { - AFiData( - glad_alerts_coverage = gladAlertsCoverage, - glad_alerts_daily = AFiDataDateCount.fillDaily(alertDate, alertCount), - glad_alerts_weekly = AFiDataDateCount.fillWeekly(alertDate, alertCount), - glad_alerts_monthly = AFiDataDateCount.fillMonthly(alertDate, alertCount), - viirs_alerts_daily = AFiDataDateCount.empty, - tree_cover_extent_total = ForestChangeDiagnosticDataDouble.fill(totalArea)) + def toAFiData(treeCoverLossArea: Double): AFiData = { + AFiData(AFiDataLossYearly.fill(treeCoverLossYear, treeCoverLossArea)) } } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala index 281f38d7..4fc0d922 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala @@ -22,7 +22,7 @@ case class AFiSummary( def toAFiData(): AFiData = { stats - .map { case (group, data) => group.toAFiData(data.alertCount, data.treeCoverExtentArea) } + .map { case (group, data) => group.toAFiData(data.treeCoverLossArea) } .foldLeft(AFiData.empty)( _ merge _) } } @@ -39,15 +39,22 @@ object AFiSummary { def visit(raster: Raster[AFiTile], col: Int, row: Int): Unit = { val lossYear: Integer = raster.tile.treeCoverLoss.getData(col, row) - val iso = ... - val adm1 = ... - val adm2: Integer = ... - - val groupKey = AFiRawDataGroup(iso, adm1, adm2, lossYear) - val area = ... - - val summaryData = acc.stats.getOrElse(groupKey, AFiRawData(lossArea = 0)) - summaryData.lossArea += area +// val iso = ... +// val adm1 = ... +// val adm2: Integer = ... +// +// val groupKey = AFiRawDataGroup(iso, adm1, adm2, lossYear) + // pixel Area + val lat: Double = raster.rasterExtent.gridRowToMap(row) + val area: Double = Geodesy.pixelArea( + lat, + raster.cellSize + ) + val areaHa = area / 10000.0 + + val groupKey = AFiRawDataGroup(lossYear) + val summaryData = acc.stats.getOrElse(groupKey, AFiRawData(treeCoverLossArea = 0)) + summaryData.treeCoverLossArea += areaHa val new_stats = acc.stats.updated(groupKey, summaryData) acc = AFiSummary(new_stats) diff --git a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataDouble.scala b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataDouble.scala index 2c7e3369..dda4cb4c 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataDouble.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataDouble.scala @@ -3,6 +3,7 @@ import frameless.Injection import org.globalforestwatch.util.Implicits._ import io.circe.syntax._ + case class ForestChangeDiagnosticDataDouble(value: Double) extends ForestChangeDiagnosticDataParser[ForestChangeDiagnosticDataDouble] { def merge( other: ForestChangeDiagnosticDataDouble diff --git a/src/main/scala/org/globalforestwatch/summarystats/gfwpro_dashboard/GfwProDashboardSummary.scala b/src/main/scala/org/globalforestwatch/summarystats/gfwpro_dashboard/GfwProDashboardSummary.scala index a41e9cbc..53abdfab 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/gfwpro_dashboard/GfwProDashboardSummary.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/gfwpro_dashboard/GfwProDashboardSummary.scala @@ -22,7 +22,8 @@ case class GfwProDashboardSummary( def toGfwProDashboardData(): GfwProDashboardData = { stats - .map { case (group, data) => group.toGfwProDashboardData(data.alertCount, data.treeCoverExtentArea) } + .map { case (group, data) => group. + toGfwProDashboardData(data.alertCount, data.treeCoverExtentArea) } .foldLeft(GfwProDashboardData.empty)( _ merge _) } } From a93ac897471634df812edaab76e45b9b848cba8d Mon Sep 17 00:00:00 2001 From: manukala6 Date: Tue, 1 Aug 2023 09:59:29 -0700 Subject: [PATCH 07/33] GTC-2436 Add negligible risk --- .../layers/NegligibleRisk.scala | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 src/main/scala/org/globalforestwatch/layers/NegligibleRisk.scala diff --git a/src/main/scala/org/globalforestwatch/layers/NegligibleRisk.scala b/src/main/scala/org/globalforestwatch/layers/NegligibleRisk.scala new file mode 100644 index 00000000..bf4ae58e --- /dev/null +++ b/src/main/scala/org/globalforestwatch/layers/NegligibleRisk.scala @@ -0,0 +1,21 @@ +package org.globalforestwatch.layers + +import org.globalforestwatch.grids.GridTile + +case class NegligibleRisk(gridTile: GridTile, kwargs: Map[String, Any]) + extends StringLayer + with OptionalILayer { + + val datasetName = "gfwpro_negligible_risk" + val uri: String = + uriForGrid(gridTile, kwargs) + + override val externalNoDataValue = "Unknown" + + def lookup(value: Int): String = value match { + case 0 => "NO" + case 1 => "YES" + case 2 => "NA" + case _ => "Unknown" + } +} From 60bfc61aaceced083ed2f488cb1a10ef73d7a0b7 Mon Sep 17 00:00:00 2001 From: manukala6 Date: Thu, 3 Aug 2023 18:06:38 -0700 Subject: [PATCH 08/33] GTC-2437 define afi data structures --- .../summarystats/afi/AFiData.scala | 27 +++++-- .../summarystats/afi/AFiDataDouble.scala | 17 ++++- .../afi/AFiDataDoubleCategory.scala | 58 +++++++++++++++ .../summarystats/afi/AFiDataLossYearly.scala | 3 - .../afi/AFiDataLossYearlyCategory.scala | 74 +++++++++++++++++++ .../summarystats/afi/AFiGridSources.scala | 12 ++- .../summarystats/afi/AFiTile.scala | 2 + 7 files changed, 181 insertions(+), 12 deletions(-) create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDoubleCategory.scala create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearlyCategory.scala diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala index 9a33c6cd..a9a3a5e9 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala @@ -1,9 +1,10 @@ package org.globalforestwatch.summarystats.afi -import cats.Semigroup -import frameless.TypedEncoder.bigDecimalEncoder +import org.globalforestwatch.summarystats.forest_change_diagnostic.ForestChangeDiagnosticDataLossYearly +import org.globalforestwatch.summarystats.forest_change_diagnostic.ForestChangeDiagnosticDataLossYearlyCategory +import org.globalforestwatch.summarystats.forest_change_diagnostic.ForestChangeDiagnosticDataDoubleCategory -import scala.collection.immutable.SortedMap +import cats.Semigroup import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder /** Summary data per class @@ -11,12 +12,19 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder * Note: This case class contains mutable values */ case class AFiData( - treeCoverLoss: AFiDataLossYearly + /** Annual Tree Cover Loss TCD 30 within location geometry */ + tree_cover_loss_total_yearly: ForestChangeDiagnosticDataLossYearly, + /** Annual Tree Cover Loss on Natural Forest pixels within location geometry */ + tree_cover_loss_natural_forest_yearly: ForestChangeDiagnosticDataLossYearlyCategory, + /** Natural Forest extent within location geometry */ + natural_forest_extent: ForestChangeDiagnosticDataDoubleCategory ) { def merge(other: AFiData): AFiData = { AFiData( - treeCoverLoss.merge(other.treeCoverLoss) + tree_cover_loss_total_yearly.merge(other.tree_cover_loss_total_yearly), + tree_cover_loss_natural_forest_yearly.merge(other.tree_cover_loss_natural_forest_yearly), + natural_forest_extent.merge(other.natural_forest_extent) ) } } @@ -24,7 +32,11 @@ case class AFiData( object AFiData { def empty: AFiData = - AFiData(AFiDataLossYearly.empty) + AFiData( + ForestChangeDiagnosticDataLossYearly.empty, + ForestChangeDiagnosticDataLossYearlyCategory.empty, + ForestChangeDiagnosticDataDoubleCategory.empty + ) implicit val afiDataSemigroup: Semigroup[AFiData] = new Semigroup[AFiData] { @@ -33,6 +45,7 @@ object AFiData { } implicit def dataExpressionEncoder: ExpressionEncoder[AFiData] = - frameless.TypedExpressionEncoder[AFiData] + frameless + .TypedExpressionEncoder[AFiData] .asInstanceOf[ExpressionEncoder[AFiData]] } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDouble.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDouble.scala index 9a1a5efc..7e37ca62 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDouble.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDouble.scala @@ -1,5 +1,6 @@ package org.globalforestwatch.summarystats.afi - +import frameless.Injection +import org.globalforestwatch.util.Implicits._ import io.circe.syntax._ case class AFiDataDouble(value: Double) extends AFiDataParser[AFiDataDouble] { @@ -16,4 +17,18 @@ case class AFiDataDouble(value: Double) extends AFiDataParser[AFiDataDouble] { } } +object AFiDataDouble { + def empty: AFiDataDouble = + AFiDataDouble(0) + + def fill(value: Double, + include: Boolean = true): AFiDataDouble = { + AFiDataDouble(value * include) + } + + implicit def injection: Injection[AFiDataDouble, String] = + Injection(_.toJson, s => AFiDataDouble(s.toDouble)) + +} + diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDoubleCategory.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDoubleCategory.scala new file mode 100644 index 00000000..0b14b44f --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDoubleCategory.scala @@ -0,0 +1,58 @@ +package org.globalforestwatch.summarystats.afi + +import frameless.Injection +import io.circe.syntax._ +import io.circe.parser.decode + +case class AFiDataDoubleCategory(value: Map[String, AFiDataDouble]) extends AFiDataParser[AFiDataDoubleCategory] { + def merge( + other: AFiDataDoubleCategory + ): AFiDataDoubleCategory = { + + AFiDataDoubleCategory(value ++ other.value.map { + case (key, otherValue) => + key -> value + .getOrElse(key, AFiDataDouble.empty) + .merge(otherValue) + }) + } + + def toJson: String = { + this.value + .map { + case (key, value) => + key -> value.round + } + .asJson + .noSpaces + } +} + +object AFiDataDoubleCategory { + def empty: AFiDataDoubleCategory = + AFiDataDoubleCategory(Map()) + + def fill( + className: String, + areaHa: Double, + noData: List[String] = List("", "Unknown", "Not applicable"), + include: Boolean = true + ): AFiDataDoubleCategory = { + if (noData.contains(className)) + AFiDataDoubleCategory.empty + else + AFiDataDoubleCategory( + Map(className -> AFiDataDouble.fill(areaHa, include)) + ) + } + + def fromString(value: String): AFiDataDoubleCategory = { + + val categories: Map[String, String] = decode[Map[String, String]](value).getOrElse(Map()) + val newValue: Map[String, AFiDataDouble] = categories.map { case (k, v) => (k, AFiDataDouble(v.toDouble)) } + AFiDataDoubleCategory(newValue) + + } + + implicit def injection: Injection[AFiDataDoubleCategory , String] = Injection(_.toJson, fromString) +} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearly.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearly.scala index 505d9b4b..de5164ff 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearly.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearly.scala @@ -3,12 +3,9 @@ package org.globalforestwatch.summarystats.afi import cats.implicits._ import cats.kernel.Semigroup import frameless.Injection -import io.circe.syntax._ import scala.collection.immutable.SortedMap import io.circe.syntax._ import io.circe.parser.decode -import cats.kernel.Semigroup -import cats.implicits._ import scala.collection.immutable.SortedMap diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearlyCategory.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearlyCategory.scala new file mode 100644 index 00000000..345b2ce0 --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearlyCategory.scala @@ -0,0 +1,74 @@ +package org.globalforestwatch.summarystats.afi + +import frameless.Injection +import io.circe.syntax._ +import io.circe.parser.decode + + +case class AFiDataLossYearlyCategory( + value: Map[String, AFiDataLossYearly] + ) extends AFiDataParser[ + AFiDataLossYearlyCategory +] { + def merge( + other: AFiDataLossYearlyCategory + ): AFiDataLossYearlyCategory = { + + AFiDataLossYearlyCategory(value ++ other.value.map { + case (key, otherValue) => + key -> value + .getOrElse(key, AFiDataLossYearly.empty) + .merge(otherValue) + }) + } + + def toJson: String = { + this.value + .map { + case (key, value) => + key -> value.round + } + .asJson + .noSpaces + } +} + +object AFiDataLossYearlyCategory { + def empty: AFiDataLossYearlyCategory = + AFiDataLossYearlyCategory(Map()) + + def fill( + className: String, + lossYear: Int, + areaHa: Double, + noData: List[String] = List("", "Unknown", "Not applicable"), + include: Boolean = true + ): AFiDataLossYearlyCategory = { + + if (noData.contains(className)) + AFiDataLossYearlyCategory.empty + else + AFiDataLossYearlyCategory( + Map( + className -> AFiDataLossYearly + .fill(lossYear, areaHa, include) + ) + ) + } + + def fromString( + value: String + ): AFiDataLossYearlyCategory = { + + val categories: Map[String, String] = + decode[Map[String, String]](value).getOrElse(Map()) + val newValues = categories.map { + case (k, v) => (k, AFiDataLossYearly.fromString(v)) + } + + AFiDataLossYearlyCategory(newValues) + + } + + implicit def injection: Injection[AFiDataLossYearlyCategory, String] = Injection(_.toJson, fromString) +} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala index 5a7807bb..2fb4d5d6 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala @@ -11,6 +11,8 @@ import org.globalforestwatch.layers._ */ case class AFiGridSources(gridTile: GridTile, kwargs: Map[String, Any]) extends GridSources { val treeCoverLoss: TreeCoverLoss = TreeCoverLoss(gridTile, kwargs) + val sbtnNaturalForest: SBTNNaturalForests = SBTNNaturalForests(gridTile, kwargs) + val negligibleRisk: NegligibleRisk = NegligibleRisk(gridTile, kwargs) def readWindow( windowKey: SpatialKey, @@ -19,7 +21,15 @@ case class AFiGridSources(gridTile: GridTile, kwargs: Map[String, Any]) extends for { lossTile <- Either.catchNonFatal(treeCoverLoss.fetchWindow(windowKey, windowLayout)).right } yield { - val tile = AFiTile(lossTile) + + val sbtnNaturalForestTile = sbtnNaturalForest.fetchWindow(windowKey, windowLayout) + val negligibleRiskTile = negligibleRisk.fetchWindow(windowKey, windowLayout) + + val tile = AFiTile( + lossTile, + sbtnNaturalForestTile, + negligibleRiskTile + ) Raster(tile, windowKey.extent(windowLayout)) } } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala index 285add67..6230fb5d 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala @@ -10,6 +10,8 @@ import org.globalforestwatch.layers._ */ case class AFiTile( treeCoverLoss: TreeCoverLoss#ITile, + sbtnNaturalForest: SBTNNaturalForests#OptionalITile, + negligibleRisk: NegligibleRisk#OptionalITile ) extends CellGrid[Int] { def cellType: CellType = treeCoverLoss.cellType From 710c32effb2ed6945b6a047f048016eb793abe85 Mon Sep 17 00:00:00 2001 From: Justin Terry Date: Tue, 8 Aug 2023 15:53:02 -0700 Subject: [PATCH 09/33] Add negligible risk --- src/main/resources/raster-catalog-pro.json | 4 +++ .../layers/NegligibleRisk.scala | 10 +++---- .../summarystats/afi/AFiData.scala | 22 +++++++------- .../summarystats/afi/AFiDataBoolean.scala | 30 +++++++++++++++++++ .../summarystats/afi/AFiRawData.scala | 15 ++++++++-- .../summarystats/afi/AFiRawDataGroup.scala | 16 ++++++++-- 6 files changed, 76 insertions(+), 21 deletions(-) create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataBoolean.scala diff --git a/src/main/resources/raster-catalog-pro.json b/src/main/resources/raster-catalog-pro.json index 5ff2db82..4fe9dd45 100644 --- a/src/main/resources/raster-catalog-pro.json +++ b/src/main/resources/raster-catalog-pro.json @@ -311,6 +311,10 @@ { "name": "sbtn_natural_forests_map", "source_uri": "s3://gfw-data-lake/sbtn_natural_forests_map/v202305/raster/epsg-4326/{grid_size}/{row_count}/class/gdal-geotiff/{tile_id}.tif" + }, + { + "name": "gfwpro_negligible_risk_analysis", + "source_uri": "s3://gfw-data-lake/gfwpro_negligible_risk_analysis/v20230726/raster/epsg-4326/{grid_size}/{row_count}/negligible_risk/geotiff/{tile_id}.tif" } ] } \ No newline at end of file diff --git a/src/main/scala/org/globalforestwatch/layers/NegligibleRisk.scala b/src/main/scala/org/globalforestwatch/layers/NegligibleRisk.scala index bf4ae58e..aa3d1670 100644 --- a/src/main/scala/org/globalforestwatch/layers/NegligibleRisk.scala +++ b/src/main/scala/org/globalforestwatch/layers/NegligibleRisk.scala @@ -6,16 +6,14 @@ case class NegligibleRisk(gridTile: GridTile, kwargs: Map[String, Any]) extends StringLayer with OptionalILayer { - val datasetName = "gfwpro_negligible_risk" + val datasetName = "gfwpro_negligible_risk_analysis" val uri: String = uriForGrid(gridTile, kwargs) - override val externalNoDataValue = "Unknown" - def lookup(value: Int): String = value match { - case 0 => "NO" - case 1 => "YES" - case 2 => "NA" + case 1 => "NO" + case 2 => "YES" + case 3 => "NA" case _ => "Unknown" } } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala index a9a3a5e9..2dd5a9a6 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala @@ -12,19 +12,21 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder * Note: This case class contains mutable values */ case class AFiData( - /** Annual Tree Cover Loss TCD 30 within location geometry */ - tree_cover_loss_total_yearly: ForestChangeDiagnosticDataLossYearly, /** Annual Tree Cover Loss on Natural Forest pixels within location geometry */ - tree_cover_loss_natural_forest_yearly: ForestChangeDiagnosticDataLossYearlyCategory, + tree_cover_loss_natural_forest_yearly: AFiDataLossYearly, /** Natural Forest extent within location geometry */ - natural_forest_extent: ForestChangeDiagnosticDataDoubleCategory + natural_forest_extent: AFiDataDouble, + is_negligible_risk: AFiDataBoolean, + negligible_risk_area: AFiDataDoubleCategory, + total_area: AFiDataDoubleCategory, ) { - def merge(other: AFiData): AFiData = { AFiData( - tree_cover_loss_total_yearly.merge(other.tree_cover_loss_total_yearly), tree_cover_loss_natural_forest_yearly.merge(other.tree_cover_loss_natural_forest_yearly), - natural_forest_extent.merge(other.natural_forest_extent) + natural_forest_extent.merge(other.natural_forest_extent), + is_negligible_risk.merge(other.is_negligible_risk), + negligible_risk_area.merge(other.negligible_risk_area), + total_area.merge(other.total_area) ) } } @@ -33,9 +35,9 @@ object AFiData { def empty: AFiData = AFiData( - ForestChangeDiagnosticDataLossYearly.empty, - ForestChangeDiagnosticDataLossYearlyCategory.empty, - ForestChangeDiagnosticDataDoubleCategory.empty + AFiDataLossYearly.empty, + AFiDataDouble.empty, + AFiDataBoolean.empty ) implicit val afiDataSemigroup: Semigroup[AFiData] = diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataBoolean.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataBoolean.scala new file mode 100644 index 00000000..57aa5d78 --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataBoolean.scala @@ -0,0 +1,30 @@ +package org.globalforestwatch.summarystats.afi + +import frameless.Injection +import io.circe.syntax._ + +case class AFiDataBoolean(value: Boolean) + extends AFiDataParser[AFiDataBoolean] { + def merge( + other: AFiDataBoolean + ): AFiDataBoolean = { + AFiDataBoolean((value || other.value)) + } + + def toJson: String = { + this.value.asJson.noSpaces + } +} + +object AFiDataBoolean { + def empty: AFiDataBoolean = + AFiDataBoolean(false) + + def fill(value: Boolean): AFiDataBoolean = { + AFiDataBoolean(value) + } + + implicit def injection: Injection[AFiDataBoolean, String] = + Injection(_.toJson, s => AFiDataBoolean(s.toBoolean)) +} + diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala index b50b9075..b573e1bf 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala @@ -6,9 +6,20 @@ import cats.Semigroup * * Note: This case class contains mutable values */ -case class AFiRawData(var treeCoverLossArea: Double) { +case class AFiRawData( + var treeCoverLossArea: Double, + var naturalLandExtent: Double, + var isNegligibleRisk: Boolean, + var negligibleRiskArea: Double, + var totalArea: Double + ) { def merge(other: AFiRawData): AFiRawData = { - AFiRawData(treeCoverLossArea + other.treeCoverLossArea) + AFiRawData( + treeCoverLossArea + other.treeCoverLossArea, + naturalLandExtent + other.naturalLandExtent, + isNegligibleRisk || other.isNegligibleRisk, + negligibleRiskArea + other.negligibleRiskArea, + totalArea + other.totalArea, } } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala index 0db7587f..36a2dcb7 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala @@ -1,12 +1,22 @@ package org.globalforestwatch.summarystats.afi import org.globalforestwatch.summarystats.forest_change_diagnostic.ForestChangeDiagnosticDataDouble + import java.time.LocalDate case class AFiRawDataGroup( - treeCoverLossYear: Int + treeCoverLossYear: Int, + gadmId: String, + isNaturalLand: Boolean, + negligibleRisk: String, ) { - def toAFiData(treeCoverLossArea: Double): AFiData = { - AFiData(AFiDataLossYearly.fill(treeCoverLossYear, treeCoverLossArea)) + def toAFiData(totalArea: Double): AFiData = { + AFiData( + AFiDataLossYearly.fill(treeCoverLossYear, totalArea, isNaturalLand), + AFiDataDouble.fill(totalArea, isNaturalLand), + AFiDataBoolean.fill(negligibleRisk == "YES"), + AFiDataDoubleCategory.fill(gadmId, totalArea, include = negligibleRisk != "NA"), + AFiDataDoubleCategory.fill(gadmId, totalArea) + ) } } From a7c201718b7364804185430c75a3613db2b924e7 Mon Sep 17 00:00:00 2001 From: Justin Terry Date: Tue, 8 Aug 2023 15:54:37 -0700 Subject: [PATCH 10/33] Fix raw data --- .../summarystats/afi/AFiRawData.scala | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala index b573e1bf..256509e9 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala @@ -6,20 +6,10 @@ import cats.Semigroup * * Note: This case class contains mutable values */ -case class AFiRawData( - var treeCoverLossArea: Double, - var naturalLandExtent: Double, - var isNegligibleRisk: Boolean, - var negligibleRiskArea: Double, - var totalArea: Double - ) { +case class AFiRawData(var totalArea: Double) { def merge(other: AFiRawData): AFiRawData = { AFiRawData( - treeCoverLossArea + other.treeCoverLossArea, - naturalLandExtent + other.naturalLandExtent, - isNegligibleRisk || other.isNegligibleRisk, - negligibleRiskArea + other.negligibleRiskArea, - totalArea + other.totalArea, + totalArea + other.totalArea } } From e757d017614fa4fabff7d7624a8ce728d265dae6 Mon Sep 17 00:00:00 2001 From: Justin Terry Date: Tue, 8 Aug 2023 15:55:00 -0700 Subject: [PATCH 11/33] Fix raw data --- .../org/globalforestwatch/summarystats/afi/AFiRawData.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala index 256509e9..aa95ebb2 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala @@ -10,6 +10,7 @@ case class AFiRawData(var totalArea: Double) { def merge(other: AFiRawData): AFiRawData = { AFiRawData( totalArea + other.totalArea + ) } } From 34ea97cc2b27d52ffaa3864f6f984879d2fc4ec9 Mon Sep 17 00:00:00 2001 From: Solomon Negusse Date: Wed, 9 Aug 2023 10:59:01 -0500 Subject: [PATCH 12/33] add GADM layers --- .../globalforestwatch/layers/GADMadm0.scala | 270 +++++++++++++++++- .../summarystats/afi/AFiGridSources.scala | 11 +- .../summarystats/afi/AFiRawDataGroup.scala | 4 +- .../summarystats/afi/AFiSummary.scala | 12 +- .../summarystats/afi/AFiTile.scala | 5 +- 5 files changed, 282 insertions(+), 20 deletions(-) diff --git a/src/main/scala/org/globalforestwatch/layers/GADMadm0.scala b/src/main/scala/org/globalforestwatch/layers/GADMadm0.scala index 549cfb35..a4e33dab 100644 --- a/src/main/scala/org/globalforestwatch/layers/GADMadm0.scala +++ b/src/main/scala/org/globalforestwatch/layers/GADMadm0.scala @@ -3,16 +3,264 @@ package org.globalforestwatch.layers import org.globalforestwatch.grids.GridTile case class GADMadm0(gridTile: GridTile, kwargs: Map[String, Any]) - extends IntegerLayer - with OptionalILayer { - - val datasetName: String = "gadm_adm0" - val uri: String = - uriForGrid(gridTile, kwargs) + extends StringLayer + with OptionalILayer { + + val datasetName: String = "gadm_adm0" + val uri: String = + uriForGrid(gridTile, kwargs) - override def lookup(value: Int): Integer = - if (value == 9999) null else value - - - } + def lookup(value: Int): String = value match { + case 4 => "AFG" + case 248 => "ALA" + case 8 => "ALB" + case 12 => "DZA" + case 16 => "ASM" + case 20 => "AND" + case 24 => "AGO" + case 660 => "AIA" + case 10 => "ATA" + case 28 => "ATG" + case 32 => "ARG" + case 51 => "ARM" + case 533 => "ABW" + case 36 => "AUS" + case 40 => "AUT" + case 31 => "AZE" + case 44 => "BHS" + case 48 => "BHR" + case 50 => "BGD" + case 52 => "BRB" + case 112 => "BLR" + case 56 => "BEL" + case 84 => "BLZ" + case 204 => "BEN" + case 60 => "BMU" + case 64 => "BTN" + case 68 => "BOL" + case 535 => "BES" + case 70 => "BIH" + case 72 => "BWA" + case 74 => "BVT" + case 76 => "BRA" + case 86 => "IOT" + case 96 => "BRN" + case 100 => "BGR" + case 854 => "BFA" + case 108 => "BDI" + case 132 => "CPV" + case 116 => "KHM" + case 120 => "CMR" + case 124 => "CAN" + case 136 => "CYM" + case 140 => "CAF" + case 148 => "TCD" + case 152 => "CHL" + case 156 => "CHN" + case 162 => "CXR" + case 166 => "CCK" + case 170 => "COL" + case 174 => "COM" + case 178 => "COG" + case 180 => "COD" + case 184 => "COK" + case 188 => "CRI" + case 384 => "CIV" + case 191 => "HRV" + case 192 => "CUB" + case 531 => "CUW" + case 196 => "CYP" + case 203 => "CZE" + case 208 => "DNK" + case 262 => "DJI" + case 212 => "DMA" + case 214 => "DOM" + case 218 => "ECU" + case 818 => "EGY" + case 222 => "SLV" + case 226 => "GNQ" + case 232 => "ERI" + case 233 => "EST" + case 748 => "SWZ" + case 231 => "ETH" + case 238 => "FLK" + case 234 => "FRO" + case 242 => "FJI" + case 246 => "FIN" + case 250 => "FRA" + case 254 => "GUF" + case 258 => "PYF" + case 260 => "ATF" + case 266 => "GAB" + case 270 => "GMB" + case 268 => "GEO" + case 276 => "DEU" + case 288 => "GHA" + case 292 => "GIB" + case 300 => "GRC" + case 304 => "GRL" + case 308 => "GRD" + case 312 => "GLP" + case 316 => "GUM" + case 320 => "GTM" + case 831 => "GGY" + case 324 => "GIN" + case 624 => "GNB" + case 328 => "GUY" + case 332 => "HTI" + case 334 => "HMD" + case 336 => "VAT" + case 340 => "HND" + case 344 => "HKG" + case 348 => "HUN" + case 352 => "ISL" + case 356 => "IND" + case 360 => "IDN" + case 364 => "IRN" + case 368 => "IRQ" + case 372 => "IRL" + case 833 => "IMN" + case 376 => "ISR" + case 380 => "ITA" + case 388 => "JAM" + case 392 => "JPN" + case 832 => "JEY" + case 400 => "JOR" + case 398 => "KAZ" + case 404 => "KEN" + case 296 => "KIR" + case 408 => "PRK" + case 410 => "KOR" + case 414 => "KWT" + case 417 => "KGZ" + case 418 => "LAO" + case 428 => "LVA" + case 422 => "LBN" + case 426 => "LSO" + case 430 => "LBR" + case 434 => "LBY" + case 438 => "LIE" + case 440 => "LTU" + case 442 => "LUX" + case 446 => "MAC" + case 450 => "MDG" + case 454 => "MWI" + case 458 => "MYS" + case 462 => "MDV" + case 466 => "MLI" + case 470 => "MLT" + case 584 => "MHL" + case 474 => "MTQ" + case 478 => "MRT" + case 480 => "MUS" + case 175 => "MYT" + case 484 => "MEX" + case 583 => "FSM" + case 498 => "MDA" + case 492 => "MCO" + case 496 => "MNG" + case 499 => "MNE" + case 500 => "MSR" + case 504 => "MAR" + case 508 => "MOZ" + case 104 => "MMR" + case 516 => "NAM" + case 520 => "NRU" + case 524 => "NPL" + case 528 => "NLD" + case 540 => "NCL" + case 554 => "NZL" + case 558 => "NIC" + case 562 => "NER" + case 566 => "NGA" + case 570 => "NIU" + case 574 => "NFK" + case 807 => "MKD" + case 580 => "MNP" + case 578 => "NOR" + case 512 => "OMN" + case 586 => "PAK" + case 585 => "PLW" + case 275 => "PSE" + case 591 => "PAN" + case 598 => "PNG" + case 600 => "PRY" + case 604 => "PER" + case 608 => "PHL" + case 612 => "PCN" + case 616 => "POL" + case 620 => "PRT" + case 630 => "PRI" + case 634 => "QAT" + case 638 => "REU" + case 642 => "ROU" + case 643 => "RUS" + case 646 => "RWA" + case 652 => "BLM" + case 654 => "SHN" + case 659 => "KNA" + case 662 => "LCA" + case 663 => "MAF" + case 666 => "SPM" + case 670 => "VCT" + case 882 => "WSM" + case 674 => "SMR" + case 678 => "STP" + case 682 => "SAU" + case 686 => "SEN" + case 688 => "SRB" + case 690 => "SYC" + case 694 => "SLE" + case 702 => "SGP" + case 534 => "SXM" + case 703 => "SVK" + case 705 => "SVN" + case 90 => "SLB" + case 706 => "SOM" + case 710 => "ZAF" + case 239 => "SGS" + case 728 => "SSD" + case 724 => "ESP" + case 144 => "LKA" + case 729 => "SDN" + case 740 => "SUR" + case 744 => "SJM" + case 752 => "SWE" + case 756 => "CHE" + case 760 => "SYR" + case 158 => "TWN" + case 762 => "TJK" + case 834 => "TZA" + case 764 => "THA" + case 626 => "TLS" + case 768 => "TGO" + case 772 => "TKL" + case 776 => "TON" + case 780 => "TTO" + case 788 => "TUN" + case 792 => "TUR" + case 795 => "TKM" + case 796 => "TCA" + case 798 => "TUV" + case 800 => "UGA" + case 804 => "UKR" + case 784 => "ARE" + case 826 => "GBR" + case 840 => "USA" + case 581 => "UMI" + case 858 => "URY" + case 860 => "UZB" + case 548 => "VUT" + case 862 => "VEN" + case 704 => "VNM" + case 92 => "VGB" + case 850 => "VIR" + case 876 => "WLF" + case 732 => "ESH" + case 887 => "YEM" + case 894 => "ZMB" + case 716 => "ZWE" + case _ => "Unknown" + } +} \ No newline at end of file diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala index 2fb4d5d6..421b934f 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala @@ -13,6 +13,9 @@ case class AFiGridSources(gridTile: GridTile, kwargs: Map[String, Any]) extends val treeCoverLoss: TreeCoverLoss = TreeCoverLoss(gridTile, kwargs) val sbtnNaturalForest: SBTNNaturalForests = SBTNNaturalForests(gridTile, kwargs) val negligibleRisk: NegligibleRisk = NegligibleRisk(gridTile, kwargs) + val gadmAdm0: GADMadm0 = GADMadm0(gridTile, kwargs) + val gadmAdm1: GADMadm1 = GADMadm1(gridTile, kwargs) + val gadmAdm2: GADMadm2 = GADMadm2(gridTile, kwargs) def readWindow( windowKey: SpatialKey, @@ -24,11 +27,17 @@ case class AFiGridSources(gridTile: GridTile, kwargs: Map[String, Any]) extends val sbtnNaturalForestTile = sbtnNaturalForest.fetchWindow(windowKey, windowLayout) val negligibleRiskTile = negligibleRisk.fetchWindow(windowKey, windowLayout) + val adm0Tile = gadmAdm0.fetchWindow(windowKey, windowLayout) + val adm1Tile = gadmAdm1.fetchWindow(windowKey, windowLayout) + val adm2Tile = gadmAdm2.fetchWindow(windowKey, windowLayout) val tile = AFiTile( lossTile, sbtnNaturalForestTile, - negligibleRiskTile + negligibleRiskTile, + adm0Tile, + adm1Tile, + adm2Tile ) Raster(tile, windowKey.extent(windowLayout)) } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala index 0db7587f..17e74206 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala @@ -4,7 +4,9 @@ import org.globalforestwatch.summarystats.forest_change_diagnostic.ForestChangeD import java.time.LocalDate case class AFiRawDataGroup( - treeCoverLossYear: Int + treeCoverLossYear: Int, + gadmId: String + ) { def toAFiData(treeCoverLossArea: Double): AFiData = { AFiData(AFiDataLossYearly.fill(treeCoverLossYear, treeCoverLossArea)) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala index 4fc0d922..7bb2b3f6 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala @@ -39,11 +39,11 @@ object AFiSummary { def visit(raster: Raster[AFiTile], col: Int, row: Int): Unit = { val lossYear: Integer = raster.tile.treeCoverLoss.getData(col, row) -// val iso = ... -// val adm1 = ... -// val adm2: Integer = ... -// -// val groupKey = AFiRawDataGroup(iso, adm1, adm2, lossYear) + val gadmAdm0: String = raster.tile.gadmAdm0.getData(col, row) + val gadmAdm1: Integer = raster.tile.gadmAdm1.getData(col, row) + val gadmAdm2: Integer = raster.tile.gadmAdm2.getData(col, row) + val gadmId: String = s"$gadmAdm0.$gadmAdm1.$gadmAdm2" + // pixel Area val lat: Double = raster.rasterExtent.gridRowToMap(row) val area: Double = Geodesy.pixelArea( @@ -52,7 +52,7 @@ object AFiSummary { ) val areaHa = area / 10000.0 - val groupKey = AFiRawDataGroup(lossYear) + val groupKey = AFiRawDataGroup(lossYear, gadmId) val summaryData = acc.stats.getOrElse(groupKey, AFiRawData(treeCoverLossArea = 0)) summaryData.treeCoverLossArea += areaHa diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala index 6230fb5d..3dabef95 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala @@ -11,7 +11,10 @@ import org.globalforestwatch.layers._ case class AFiTile( treeCoverLoss: TreeCoverLoss#ITile, sbtnNaturalForest: SBTNNaturalForests#OptionalITile, - negligibleRisk: NegligibleRisk#OptionalITile + negligibleRisk: NegligibleRisk#OptionalITile, + gadmAdm0: GADMadm0#OptionalITile, + gadmAdm1: GADMadm1#OptionalITile, + gadmAdm2: GADMadm2#OptionalITile ) extends CellGrid[Int] { def cellType: CellType = treeCoverLoss.cellType From 0f0ed7583ffc2fae845bf314060e72679c18fada Mon Sep 17 00:00:00 2001 From: Justin Terry Date: Thu, 10 Aug 2023 13:47:28 -0700 Subject: [PATCH 13/33] Use data types --- .../summarystats/afi/AFiAnalysis.scala | 7 +++++++ .../summarystats/afi/AFiData.scala | 21 +++++++------------ .../summarystats/afi/AFiRawDataGroup.scala | 5 ++--- .../summarystats/afi/AFiSummary.scala | 14 +++++++++---- 4 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index e05c8004..59c525ff 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -44,7 +44,14 @@ object AFiAnalysis extends SummaryAnalysis { // TODO somehow convert AFiSummary to AFiData + import spark.implicits._ + val summaryDF = AFiDF.getFeatureDataFrame(dataRDD, spark) + .withColumn( + "negligible_risk_percent", + $"negligible_risk_area" / $"total_area" * 100 + ).drop("negligible_risk_area") + val runOutputUrl: String = getOutputUrl(kwargs) AFiExport.export(featureType, summaryDF, runOutputUrl, kwargs) } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala index 2dd5a9a6..ff82bfe3 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala @@ -1,9 +1,5 @@ package org.globalforestwatch.summarystats.afi -import org.globalforestwatch.summarystats.forest_change_diagnostic.ForestChangeDiagnosticDataLossYearly -import org.globalforestwatch.summarystats.forest_change_diagnostic.ForestChangeDiagnosticDataLossYearlyCategory -import org.globalforestwatch.summarystats.forest_change_diagnostic.ForestChangeDiagnosticDataDoubleCategory - import cats.Semigroup import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder @@ -16,15 +12,13 @@ case class AFiData( tree_cover_loss_natural_forest_yearly: AFiDataLossYearly, /** Natural Forest extent within location geometry */ natural_forest_extent: AFiDataDouble, - is_negligible_risk: AFiDataBoolean, - negligible_risk_area: AFiDataDoubleCategory, - total_area: AFiDataDoubleCategory, + negligible_risk_area: AFiDataDouble, + total_area: AFiDataDouble, ) { def merge(other: AFiData): AFiData = { AFiData( tree_cover_loss_natural_forest_yearly.merge(other.tree_cover_loss_natural_forest_yearly), natural_forest_extent.merge(other.natural_forest_extent), - is_negligible_risk.merge(other.is_negligible_risk), negligible_risk_area.merge(other.negligible_risk_area), total_area.merge(other.total_area) ) @@ -37,7 +31,8 @@ object AFiData { AFiData( AFiDataLossYearly.empty, AFiDataDouble.empty, - AFiDataBoolean.empty + AFiDataDouble.empty, + AFiDataDouble.empty, ) implicit val afiDataSemigroup: Semigroup[AFiData] = @@ -46,8 +41,8 @@ object AFiData { x.merge(y) } - implicit def dataExpressionEncoder: ExpressionEncoder[AFiData] = - frameless - .TypedExpressionEncoder[AFiData] - .asInstanceOf[ExpressionEncoder[AFiData]] +// implicit def dataExpressionEncoder: ExpressionEncoder[AFiData] = +// frameless +// .TypedExpressionEncoder[AFiData] +// .asInstanceOf[ExpressionEncoder[AFiData]] } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala index 36a2dcb7..46c7d2ae 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala @@ -14,9 +14,8 @@ case class AFiRawDataGroup( AFiData( AFiDataLossYearly.fill(treeCoverLossYear, totalArea, isNaturalLand), AFiDataDouble.fill(totalArea, isNaturalLand), - AFiDataBoolean.fill(negligibleRisk == "YES"), - AFiDataDoubleCategory.fill(gadmId, totalArea, include = negligibleRisk != "NA"), - AFiDataDoubleCategory.fill(gadmId, totalArea) + AFiDataDouble.fill(totalArea, negligibleRisk != "Y"), + AFiDataDouble.fill(totalArea) ) } } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala index 4fc0d922..3f81f2ba 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala @@ -22,7 +22,7 @@ case class AFiSummary( def toAFiData(): AFiData = { stats - .map { case (group, data) => group.toAFiData(data.treeCoverLossArea) } + .map { case (group, data) => group.toAFiData(data.totalArea) } .foldLeft(AFiData.empty)( _ merge _) } } @@ -38,6 +38,8 @@ object AFiSummary { def visit(raster: Raster[AFiTile], col: Int, row: Int): Unit = { val lossYear: Integer = raster.tile.treeCoverLoss.getData(col, row) + val naturalLandsCategory: String = raster.tile.sbtnNaturalForest.getData(col, row) + val negligibleRisk: String = raster.tile.negligibleRisk.getData(col, row) // val iso = ... // val adm1 = ... @@ -51,10 +53,14 @@ object AFiSummary { raster.cellSize ) val areaHa = area / 10000.0 + val isNaturalLand = naturalLandsCategory =="Natural Forest" - val groupKey = AFiRawDataGroup(lossYear) - val summaryData = acc.stats.getOrElse(groupKey, AFiRawData(treeCoverLossArea = 0)) - summaryData.treeCoverLossArea += areaHa + // TODO implement + val gadmId = "IDN.24.9" + + val groupKey = AFiRawDataGroup(lossYear, gadmId, isNaturalLand, negligibleRisk) + val summaryData = acc.stats.getOrElse(groupKey, AFiRawData(totalArea = 0)) + summaryData.totalArea += areaHa val new_stats = acc.stats.updated(groupKey, summaryData) acc = AFiSummary(new_stats) From de83f917074056c7a1d9d89d3e788148de8bcbe6 Mon Sep 17 00:00:00 2001 From: Justin Terry Date: Thu, 10 Aug 2023 15:24:22 -0700 Subject: [PATCH 14/33] Don't use RDD --- src/main/resources/raster-catalog-pro.json | 2 +- .../summarystats/afi/AFiAnalysis.scala | 14 +-- .../summarystats/afi/AFiDF.scala | 17 ++-- .../summarystats/afi/AFiData.scala | 36 +++----- .../summarystats/afi/AFiDataBoolean.scala | 30 ------ .../summarystats/afi/AFiDataDouble.scala | 34 ------- .../afi/AFiDataDoubleCategory.scala | 58 ------------ .../summarystats/afi/AFiDataGroup.scala | 10 ++ .../summarystats/afi/AFiDataLossYearly.scala | 91 ------------------- .../afi/AFiDataLossYearlyCategory.scala | 74 --------------- .../summarystats/afi/AFiRawData.scala | 19 ---- .../summarystats/afi/AFiRawDataGroup.scala | 21 ----- .../summarystats/afi/AFiSummary.scala | 32 ++++--- 13 files changed, 59 insertions(+), 379 deletions(-) delete mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataBoolean.scala delete mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDouble.scala delete mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDoubleCategory.scala create mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataGroup.scala delete mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearly.scala delete mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearlyCategory.scala delete mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala delete mode 100644 src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala diff --git a/src/main/resources/raster-catalog-pro.json b/src/main/resources/raster-catalog-pro.json index 4fe9dd45..1a35eff4 100644 --- a/src/main/resources/raster-catalog-pro.json +++ b/src/main/resources/raster-catalog-pro.json @@ -314,7 +314,7 @@ }, { "name": "gfwpro_negligible_risk_analysis", - "source_uri": "s3://gfw-data-lake/gfwpro_negligible_risk_analysis/v20230726/raster/epsg-4326/{grid_size}/{row_count}/negligible_risk/geotiff/{tile_id}.tif" + "source_uri": "s3://gfw-data-lake/gfwpro_negligible_risk_analysis/v20230726/raster/epsg-4326/{grid_size}/{row_count}/risk/geotiff/{tile_id}.tif" } ] } \ No newline at end of file diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index 59c525ff..ca0ced5e 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -35,18 +35,18 @@ object AFiAnalysis extends SummaryAnalysis { } val summaryRDD: RDD[ValidatedLocation[AFiSummary]] = AFiRDD(validatedRDD, AFiGrid.blockTileGrid, kwargs) - val dataRDD: RDD[ValidatedLocation[AFiData]] = ValidatedWorkflow(summaryRDD).mapValid { summaries => - summaries - .mapValues { - case summary: AFiSummary => summary.toAFiData() - } - }.unify +// val dataRDD: RDD[ValidatedLocation[AFiData]] = ValidatedWorkflow(summaryRDD).mapValid { summaries => +// summaries +// .mapValues { +// case summary: AFiSummary => summary.toAFiData() +// } +// }.unify // TODO somehow convert AFiSummary to AFiData import spark.implicits._ - val summaryDF = AFiDF.getFeatureDataFrame(dataRDD, spark) + val summaryDF = AFiDF.getFeatureDataFrame(summaryRDD, spark) .withColumn( "negligible_risk_percent", $"negligible_risk_area" / $"total_area" * 100 diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala index bd887560..a1b49ac0 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala @@ -12,7 +12,7 @@ object AFiDF extends SummaryDF { case class RowGadmId(list_id: String, location_id: String, gadm_id: String) def getFeatureDataFrame( - dataRDD: RDD[ValidatedLocation[AFiData]], + summaryRDD: RDD[ValidatedLocation[AFiSummary]], spark: SparkSession ): DataFrame = { import spark.implicits._ @@ -28,14 +28,17 @@ object AFiDF extends SummaryDF { throw new IllegalArgumentException(s"Can't produce DataFrame for $id") } - dataRDD - .map { + summaryRDD + .flatMap { case Valid(Location(fid, data)) => - (rowId(fid), RowError.empty, data) + data.stats.map { + case (dataGroup, data) => + (rowId(fid), RowError.empty, dataGroup, data) + } case Invalid(Location(fid, err)) => - (rowId(fid), RowError.fromJobError(err), AFiData.empty) + List((rowId(fid), RowError.fromJobError(err), AFiDataGroup.empty, AFiData.empty)) } - .toDF("id", "error", "data") - .select($"id.*", $"error.*", $"data.*") + .toDF("id", "error", "dataGroup", "data") + .select($"id.*", $"error.*", $"dataGroup.*", $"data.*") } } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala index ff82bfe3..89d131b2 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala @@ -1,48 +1,34 @@ package org.globalforestwatch.summarystats.afi import cats.Semigroup -import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder /** Summary data per class * * Note: This case class contains mutable values */ case class AFiData( - /** Annual Tree Cover Loss on Natural Forest pixels within location geometry */ - tree_cover_loss_natural_forest_yearly: AFiDataLossYearly, - /** Natural Forest extent within location geometry */ - natural_forest_extent: AFiDataDouble, - negligible_risk_area: AFiDataDouble, - total_area: AFiDataDouble, -) { + var natural_land_extent: Double, + var tree_cover_loss_area: Double, + var negligible_risk_area: Double, + var total_area: Double + ) { def merge(other: AFiData): AFiData = { AFiData( - tree_cover_loss_natural_forest_yearly.merge(other.tree_cover_loss_natural_forest_yearly), - natural_forest_extent.merge(other.natural_forest_extent), - negligible_risk_area.merge(other.negligible_risk_area), - total_area.merge(other.total_area) + natural_land_extent + other.natural_land_extent, + tree_cover_loss_area + other.tree_cover_loss_area, + negligible_risk_area + other.negligible_risk_area, + total_area + other.total_area ) } } object AFiData { - def empty: AFiData = - AFiData( - AFiDataLossYearly.empty, - AFiDataDouble.empty, - AFiDataDouble.empty, - AFiDataDouble.empty, - ) + AFiData(0, 0, 0, 0) implicit val afiDataSemigroup: Semigroup[AFiData] = new Semigroup[AFiData] { - def combine(x: AFiData, y: AFiData): AFiData = - x.merge(y) + def combine(x: AFiData, y: AFiData): AFiData = x.merge(y) } -// implicit def dataExpressionEncoder: ExpressionEncoder[AFiData] = -// frameless -// .TypedExpressionEncoder[AFiData] -// .asInstanceOf[ExpressionEncoder[AFiData]] } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataBoolean.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataBoolean.scala deleted file mode 100644 index 57aa5d78..00000000 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataBoolean.scala +++ /dev/null @@ -1,30 +0,0 @@ -package org.globalforestwatch.summarystats.afi - -import frameless.Injection -import io.circe.syntax._ - -case class AFiDataBoolean(value: Boolean) - extends AFiDataParser[AFiDataBoolean] { - def merge( - other: AFiDataBoolean - ): AFiDataBoolean = { - AFiDataBoolean((value || other.value)) - } - - def toJson: String = { - this.value.asJson.noSpaces - } -} - -object AFiDataBoolean { - def empty: AFiDataBoolean = - AFiDataBoolean(false) - - def fill(value: Boolean): AFiDataBoolean = { - AFiDataBoolean(value) - } - - implicit def injection: Injection[AFiDataBoolean, String] = - Injection(_.toJson, s => AFiDataBoolean(s.toBoolean)) -} - diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDouble.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDouble.scala deleted file mode 100644 index 7e37ca62..00000000 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDouble.scala +++ /dev/null @@ -1,34 +0,0 @@ -package org.globalforestwatch.summarystats.afi -import frameless.Injection -import org.globalforestwatch.util.Implicits._ -import io.circe.syntax._ - -case class AFiDataDouble(value: Double) extends AFiDataParser[AFiDataDouble] { - def merge( - other: AFiDataDouble - ): AFiDataDouble = { - AFiDataDouble(value + other.value) - } - - def round: Double = this.round(value) - - def toJson: String = { - this.round.asJson.noSpaces - } -} - -object AFiDataDouble { - def empty: AFiDataDouble = - AFiDataDouble(0) - - def fill(value: Double, - include: Boolean = true): AFiDataDouble = { - AFiDataDouble(value * include) - } - - implicit def injection: Injection[AFiDataDouble, String] = - Injection(_.toJson, s => AFiDataDouble(s.toDouble)) - -} - - diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDoubleCategory.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDoubleCategory.scala deleted file mode 100644 index 0b14b44f..00000000 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataDoubleCategory.scala +++ /dev/null @@ -1,58 +0,0 @@ -package org.globalforestwatch.summarystats.afi - -import frameless.Injection -import io.circe.syntax._ -import io.circe.parser.decode - -case class AFiDataDoubleCategory(value: Map[String, AFiDataDouble]) extends AFiDataParser[AFiDataDoubleCategory] { - def merge( - other: AFiDataDoubleCategory - ): AFiDataDoubleCategory = { - - AFiDataDoubleCategory(value ++ other.value.map { - case (key, otherValue) => - key -> value - .getOrElse(key, AFiDataDouble.empty) - .merge(otherValue) - }) - } - - def toJson: String = { - this.value - .map { - case (key, value) => - key -> value.round - } - .asJson - .noSpaces - } -} - -object AFiDataDoubleCategory { - def empty: AFiDataDoubleCategory = - AFiDataDoubleCategory(Map()) - - def fill( - className: String, - areaHa: Double, - noData: List[String] = List("", "Unknown", "Not applicable"), - include: Boolean = true - ): AFiDataDoubleCategory = { - if (noData.contains(className)) - AFiDataDoubleCategory.empty - else - AFiDataDoubleCategory( - Map(className -> AFiDataDouble.fill(areaHa, include)) - ) - } - - def fromString(value: String): AFiDataDoubleCategory = { - - val categories: Map[String, String] = decode[Map[String, String]](value).getOrElse(Map()) - val newValue: Map[String, AFiDataDouble] = categories.map { case (k, v) => (k, AFiDataDouble(v.toDouble)) } - AFiDataDoubleCategory(newValue) - - } - - implicit def injection: Injection[AFiDataDoubleCategory , String] = Injection(_.toJson, fromString) -} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataGroup.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataGroup.scala new file mode 100644 index 00000000..1cada84d --- /dev/null +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataGroup.scala @@ -0,0 +1,10 @@ +package org.globalforestwatch.summarystats.afi + +case class AFiDataGroup( + gadm_id: String +) + +object AFiDataGroup { + def empty: AFiDataGroup = + AFiDataGroup("") +} \ No newline at end of file diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearly.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearly.scala deleted file mode 100644 index de5164ff..00000000 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearly.scala +++ /dev/null @@ -1,91 +0,0 @@ -package org.globalforestwatch.summarystats.afi - -import cats.implicits._ -import cats.kernel.Semigroup -import frameless.Injection -import scala.collection.immutable.SortedMap -import io.circe.syntax._ -import io.circe.parser.decode - -import scala.collection.immutable.SortedMap - -case class AFiDataLossYearly(value: SortedMap[Int, Double]) - extends AFiDataParser[AFiDataLossYearly] { - - def merge(other: AFiDataLossYearly): AFiDataLossYearly = { - AFiDataLossYearly(Semigroup[SortedMap[Int, Double]].combine(value, other.value)) - } - - def round: SortedMap[Int, Double] = this.value.map { case (key, value) => key -> this.round(value) } - - def limitToMaxYear(maxYear: Int): AFiDataLossYearly = { - AFiDataLossYearly(value.filterKeys{ year => year <= maxYear }) - } - - def toJson: String = { - this.round.asJson.noSpaces - } -} - -object AFiDataLossYearly { - def empty: AFiDataLossYearly = - AFiDataLossYearly( - SortedMap() - ) - - def prefilled: AFiDataLossYearly = - AFiDataLossYearly( - SortedMap( - 2001 -> 0, - 2002 -> 0, - 2003 -> 0, - 2004 -> 0, - 2005 -> 0, - 2006 -> 0, - 2007 -> 0, - 2008 -> 0, - 2009 -> 0, - 2010 -> 0, - 2011 -> 0, - 2012 -> 0, - 2013 -> 0, - 2014 -> 0, - 2015 -> 0, - 2016 -> 0, - 2017 -> 0, - 2018 -> 0, - 2019 -> 0, - 2020 -> 0, - 2021 -> 0, - ) - ) - - def fill(lossYear: Int, - areaHa: Double, - include: Boolean = true): AFiDataLossYearly = { - - // Only except lossYear values within range of default map - val minLossYear: Int = this.prefilled.value.keysIterator.min - val maxLossYear: Int = this.prefilled.value.keysIterator.max - - if (minLossYear <= lossYear && lossYear <= maxLossYear && include) { - AFiDataLossYearly.prefilled.merge( - AFiDataLossYearly( - SortedMap( - lossYear -> areaHa - ) - ) - ) - } else - this.empty - } - - def fromString(value: String): AFiDataLossYearly = { - val sortedMap = decode[SortedMap[Int, Double]](value) - AFiDataLossYearly(sortedMap.getOrElse(SortedMap())) - } - - implicit def injection: Injection[AFiDataLossYearly, String] = Injection(_.toJson, fromString) -} - - diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearlyCategory.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearlyCategory.scala deleted file mode 100644 index 345b2ce0..00000000 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDataLossYearlyCategory.scala +++ /dev/null @@ -1,74 +0,0 @@ -package org.globalforestwatch.summarystats.afi - -import frameless.Injection -import io.circe.syntax._ -import io.circe.parser.decode - - -case class AFiDataLossYearlyCategory( - value: Map[String, AFiDataLossYearly] - ) extends AFiDataParser[ - AFiDataLossYearlyCategory -] { - def merge( - other: AFiDataLossYearlyCategory - ): AFiDataLossYearlyCategory = { - - AFiDataLossYearlyCategory(value ++ other.value.map { - case (key, otherValue) => - key -> value - .getOrElse(key, AFiDataLossYearly.empty) - .merge(otherValue) - }) - } - - def toJson: String = { - this.value - .map { - case (key, value) => - key -> value.round - } - .asJson - .noSpaces - } -} - -object AFiDataLossYearlyCategory { - def empty: AFiDataLossYearlyCategory = - AFiDataLossYearlyCategory(Map()) - - def fill( - className: String, - lossYear: Int, - areaHa: Double, - noData: List[String] = List("", "Unknown", "Not applicable"), - include: Boolean = true - ): AFiDataLossYearlyCategory = { - - if (noData.contains(className)) - AFiDataLossYearlyCategory.empty - else - AFiDataLossYearlyCategory( - Map( - className -> AFiDataLossYearly - .fill(lossYear, areaHa, include) - ) - ) - } - - def fromString( - value: String - ): AFiDataLossYearlyCategory = { - - val categories: Map[String, String] = - decode[Map[String, String]](value).getOrElse(Map()) - val newValues = categories.map { - case (k, v) => (k, AFiDataLossYearly.fromString(v)) - } - - AFiDataLossYearlyCategory(newValues) - - } - - implicit def injection: Injection[AFiDataLossYearlyCategory, String] = Injection(_.toJson, fromString) -} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala deleted file mode 100644 index aa95ebb2..00000000 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawData.scala +++ /dev/null @@ -1,19 +0,0 @@ -package org.globalforestwatch.summarystats.afi - -import cats.Semigroup - -/** Summary data per class - * - * Note: This case class contains mutable values - */ -case class AFiRawData(var totalArea: Double) { - def merge(other: AFiRawData): AFiRawData = { - AFiRawData( - totalArea + other.totalArea - ) - } -} - -object AFiRawData { - implicit val lossDataSemigroup: Semigroup[AFiRawData] = Semigroup.instance(_ merge _) -} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala deleted file mode 100644 index 46c7d2ae..00000000 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiRawDataGroup.scala +++ /dev/null @@ -1,21 +0,0 @@ -package org.globalforestwatch.summarystats.afi - -import org.globalforestwatch.summarystats.forest_change_diagnostic.ForestChangeDiagnosticDataDouble - -import java.time.LocalDate - -case class AFiRawDataGroup( - treeCoverLossYear: Int, - gadmId: String, - isNaturalLand: Boolean, - negligibleRisk: String, -) { - def toAFiData(totalArea: Double): AFiData = { - AFiData( - AFiDataLossYearly.fill(treeCoverLossYear, totalArea, isNaturalLand), - AFiDataDouble.fill(totalArea, isNaturalLand), - AFiDataDouble.fill(totalArea, negligibleRisk != "Y"), - AFiDataDouble.fill(totalArea) - ) - } -} diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala index 3f81f2ba..80f47ac1 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala @@ -4,13 +4,14 @@ import cats.implicits._ import geotrellis.raster._ import geotrellis.raster.Raster import geotrellis.raster.summary.GridVisitor -import org.globalforestwatch.summarystats.Summary +import org.globalforestwatch.summarystats.{Summary, summarySemigroup} import org.globalforestwatch.util.Geodesy + import java.time.LocalDate /** LossData Summary by year */ case class AFiSummary( - stats: Map[AFiRawDataGroup, AFiRawData] = Map.empty + stats: Map[AFiDataGroup, AFiData] = Map.empty ) extends Summary[AFiSummary] { /** Combine two Maps and combine their LossData when a year is present in both */ @@ -18,13 +19,8 @@ case class AFiSummary( // the years.combine method uses LossData.lossDataSemigroup instance to perform per value combine on the map AFiSummary(stats.combine(other.stats)) } - def isEmpty = stats.isEmpty - def toAFiData(): AFiData = { - stats - .map { case (group, data) => group.toAFiData(data.totalArea) } - .foldLeft(AFiData.empty)( _ merge _) - } + def isEmpty = stats.isEmpty } object AFiSummary { @@ -45,7 +41,7 @@ object AFiSummary { // val adm1 = ... // val adm2: Integer = ... // -// val groupKey = AFiRawDataGroup(iso, adm1, adm2, lossYear) + // pixel Area val lat: Double = raster.rasterExtent.gridRowToMap(row) val area: Double = Geodesy.pixelArea( @@ -58,9 +54,21 @@ object AFiSummary { // TODO implement val gadmId = "IDN.24.9" - val groupKey = AFiRawDataGroup(lossYear, gadmId, isNaturalLand, negligibleRisk) - val summaryData = acc.stats.getOrElse(groupKey, AFiRawData(totalArea = 0)) - summaryData.totalArea += areaHa + val groupKey = AFiDataGroup(gadmId) + val summaryData = acc.stats.getOrElse(groupKey, AFiData(0, 0, 0, 0)) + summaryData.total_area += areaHa + + if (lossYear >= 2021) { + summaryData.tree_cover_loss_area += areaHa + } + + if (negligibleRisk == "NO") { + summaryData.negligible_risk_area += areaHa + } + + if (naturalLandsCategory == "Natural Forest") { + summaryData.natural_land_extent += areaHa + } val new_stats = acc.stats.updated(groupKey, summaryData) acc = AFiSummary(new_stats) From c9044a709bf49b8eb71e1f1b547cb7b4665f95f0 Mon Sep 17 00:00:00 2001 From: Solomon Negusse Date: Tue, 15 Aug 2023 09:34:30 -0500 Subject: [PATCH 15/33] remove gadm value for location rows --- .../org/globalforestwatch/summarystats/afi/AFiAnalysis.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index ca0ced5e..53dd7621 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -46,7 +46,10 @@ object AFiAnalysis extends SummaryAnalysis { // TODO somehow convert AFiSummary to AFiData import spark.implicits._ - val summaryDF = AFiDF.getFeatureDataFrame(summaryRDD, spark) + val summaryDF = AFiDF + .getFeatureDataFrame(summaryRDD, spark) + .withColumn("gadm_id", when(col("location_id") =!= -1, lit("")).otherwise(col("gadm_id"))) + .withColumn( "negligible_risk_percent", $"negligible_risk_area" / $"total_area" * 100 From 7cd4b8eac9aa578febeb4007e8e5c174ab8a69b6 Mon Sep 17 00:00:00 2001 From: Solomon Negusse Date: Tue, 15 Aug 2023 09:35:33 -0500 Subject: [PATCH 16/33] aggregated gadm adm2 partitions of dissolved area --- .../summarystats/afi/AFiAnalysis.scala | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index 53dd7621..1704f3c6 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -1,5 +1,5 @@ package org.globalforestwatch.summarystats.afi - +import org.apache.spark.sql.functions.{col, lit, when, sum, max} import cats.data.Validated.{Invalid, Valid} import cats.data.{NonEmptyList, Validated} import geotrellis.vector.{Feature, Geometry} @@ -15,7 +15,6 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.SparkSession import org.apache.spark.storage.StorageLevel - object AFiAnalysis extends SummaryAnalysis { val name = "afi" @@ -29,8 +28,8 @@ object AFiAnalysis extends SummaryAnalysis { featureRDD.persist(StorageLevel.MEMORY_AND_DISK) // TODO invalid should map to job error somehow, probably using ValidatedWorkflow - val validatedRDD = featureRDD.map{ - case Validated.Valid(Location(id, geom: Geometry)) => Feature(geom, id) + val validatedRDD = featureRDD.map { + case Validated.Valid(Location(id, geom: Geometry)) => Feature(geom, id) case Validated.Invalid(Location(id, geom: Geometry)) => Feature(geom, id) } @@ -42,7 +41,6 @@ object AFiAnalysis extends SummaryAnalysis { // } // }.unify - // TODO somehow convert AFiSummary to AFiData import spark.implicits._ @@ -50,12 +48,29 @@ object AFiAnalysis extends SummaryAnalysis { .getFeatureDataFrame(summaryRDD, spark) .withColumn("gadm_id", when(col("location_id") =!= -1, lit("")).otherwise(col("gadm_id"))) + val gadmAgg = summaryDF + .filter($"location_id" === -1) + .groupBy($"list_id") + .agg( + sum("natural_land_extent").alias("natural_land_extent"), + sum("tree_cover_loss_area").alias("tree_cover_loss_area"), + sum("negligible_risk_area").alias("negligible_risk_area"), + sum("total_area").alias("total_area"), + max("status_code").alias("status_code") + ) + .withColumn("gadm_id", lit("")) + .withColumn("location_error", lit("")) + .withColumn("location_id", lit(-1)) + + val combinedDF = summaryDF.unionByName(gadmAgg) + val resultsDF = combinedDF .withColumn( "negligible_risk_percent", $"negligible_risk_area" / $"total_area" * 100 - ).drop("negligible_risk_area") + ) + .drop("negligible_risk_area") val runOutputUrl: String = getOutputUrl(kwargs) - AFiExport.export(featureType, summaryDF, runOutputUrl, kwargs) + AFiExport.export(featureType, resultsDF, runOutputUrl, kwargs) } } From ae71630b668fd0f3c28ae05643e78d6f05d8aba2 Mon Sep 17 00:00:00 2001 From: Dan Scales Date: Wed, 16 Aug 2023 12:02:08 -0700 Subject: [PATCH 17/33] Update README.md with new subcommands (analyses) and cmd-line options Also updated with the fact that the subcommand (the analysis) is now always provided as the first argument (and without the --analysis option) Fixed a help message for the integrated alerts subcommand. --- README.md | 112 ++++++++++++------ .../IntegratedAlertsCommand.scala | 2 +- 2 files changed, 77 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 965962f9..f03fd40c 100644 --- a/README.md +++ b/README.md @@ -9,13 +9,14 @@ This project performs a polygonal summary on tree cover loss and intersecting la Currently the following analysis are implemented * Tree Cover Loss (for ArcPy client) -* Annual Update * Annual Update minimal * Carbon Flux Full Standard Model * Carbon Flux Sensitivity Analysis * Glad Alerts -* Viirs Fire Alerts -* MODIS Fire Alerts +* Viirs/ MODIS Fire Alerts +* Forest Change Diagnostic +* GFW Pro Dashboard +* Integrated Alerts ### Tree Cover Loss @@ -35,18 +36,9 @@ This type of analysis only supports simple features as input. Best used together sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain treecoverloss --feature_type feature --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix --tcd 2000 --threshold 0 --threshold 30 --contextual layer is__gfw_plantations --carbon_pools ``` -### Annual Update - -A complex analysis intersecting Tree Cover Loss data with more than 40 layers. This analysis is used for the GFU. Supported input features are GADM features only. -Output are Summary and Change tables for ISO, ADM1 and ADM2 areas. - -```sbt -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --analysis annualupdate --feature_type gadm --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix -``` - ### Annual Update minimal -This analysis follows the same methodology as the annual update analysis above, just with fewer intersecting layers. +An analysis intersecting Tree Cover Loss data with a number of layers. It is used to compute statistics for the GFW country and user dashboards, including carbon flux outputs (emissions, removals, net flux). Supported input features are @@ -61,10 +53,10 @@ For GADM there will also be summary tables with one row per ISO, ADM1, ADM2 and To produce final spreadsheets you will need to add another [post processing step](https://github.com/wri/write_country_stats). ```sbt -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --analysis annualupdate_minimal --feature_type gadm --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --analysis annualupdate_minimal --feature_type wdpa --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --analysis annualupdate_minimal --feature_type geostore --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --analysis annualupdate_minimal --feature_type feature --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix +sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain annualupdate_minimal --feature_type gadm --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix +sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain annualupdate_minimal --feature_type wdpa --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix +sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain annualupdate_minimal --feature_type geostore --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix +sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain annualupdate_minimal --feature_type feature --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix ``` ### Carbon Flux Full Standard Model @@ -75,7 +67,7 @@ It also analyzes several contextual layers that are unique to the carbon flux mo It currently only works with GADM features. ```sbt -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --analysis carbonflux --feature_type gadm --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix +sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain carbonflux --feature_type gadm --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix ``` ### Carbon Flux Sensitivity Analysis @@ -87,8 +79,8 @@ To run this model with the standard flux model output for an analysis using fewe It currently only works with GADM features. ```sbt -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --analysis carbon_sensitivity --feature_type gadm --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix --sensitivity_type sensitivity_analysis_type -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --analysis carbon_sensitivity --feature_type gadm --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix --sensitivity_type standard +sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain carbon_sensitivity --feature_type gadm --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix --sensitivity_type sensitivity_analysis_type +sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain carbon_sensitivity --feature_type gadm --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix --sensitivity_type standard ``` ### Glad Alerts @@ -107,10 +99,10 @@ Supported input features are * Simple Feature ```sbt -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --analysis gladalerts --feature_type gadm --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --analysis gladalerts --feature_type wdpa --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --analysis gladalerts --feature_type geostore --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --analysis gladalerts --feature_type feature --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] +sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain gladalerts --feature_type gadm --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] +sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain gladalerts --feature_type wdpa --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] +sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain gladalerts --feature_type geostore --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] +sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain gladalerts --feature_type feature --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] ``` ### Viirs/ MODIS Fire Alerts @@ -127,11 +119,50 @@ Supported input features are * Simple Feature ```sbt -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --analysis firealerts --fire_alert_type MODIS/VIIRS --fire_alert_source s3://bucket/prefix/file.tsv --feature_type gadm --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --analysis firealerts --fire_alert_type MODIS/VIIRS --fire_alert_source s3://bucket/prefix/file.tsv --feature_type wdpa --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --analysis firealerts --fire_alert_type MODIS/VIIRS --fire_alert_source s3://bucket/prefix/file.tsv --feature_type geostore --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --analysis firealerts --fire_alert_type MODIS/VIIRS --fire_alert_source s3://bucket/prefix/file.tsv --feature_type feature --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] +sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain firealerts --fire_alert_type MODIS/VIIRS --fire_alert_source s3://bucket/prefix/file.tsv --feature_type gadm --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] +sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain firealerts --fire_alert_type MODIS/VIIRS --fire_alert_source s3://bucket/prefix/file.tsv --feature_type wdpa --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] +sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain firealerts --fire_alert_type MODIS/VIIRS --fire_alert_source s3://bucket/prefix/file.tsv --feature_type geostore --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] +sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain firealerts --fire_alert_type MODIS/VIIRS --fire_alert_source s3://bucket/prefix/file.tsv --feature_type feature --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] +``` +### Forest Change Diagnostic + +Forest Change Diagnostic computes forest loss and fire alerts for an input set of +lists of geometries. It is used to compute statistics for lists of a GFW Pro +customer. It requires the `firealerts` options and optionally takes any `feature` +options. The only supported input feature is `gfwpro`. Automatically turns on the +`--split_features` option. + +```sbt +sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain forest_change_diagnostic --feature_type gfwpro --features s3://bucket/prefix/file.tsv --fire_alert_source s3://bucket/prefix/file.tsv --output s3://bucket/prefix ``` +### GFW Pro Dashboard + +GFW Pro Dashboard computes summary statistics for the GFW Pro Dashboard. It +optionally takes `firealert` options and `feature` options. + +Supported input features are: + +* GADM +* Geostore +* WDPA +* Simple Feature +* GFW Pro + + +### Integrated Alerts + +Integrated Alerts computes a combination of deforestation alerts based on GLAD-L, +GLAD-S2, and RADD systems. It is used to compute these alerts for GFW. It optionally +takes any `feature`, `gadm`, or `wdpa` options. + +Supported input features are + +* GADM +* Geostore +* WDPA +* Simple Feature +* GFW Pro + ## Inputs @@ -147,6 +178,7 @@ Larger features should be split into smaller features, prior to running the anal Also make sure, that features do not overlap with tile boundaries (we use 10x10 degree tiles). For best performance, intersect input features with a 1x1 degree grid. If you are not sure how to best approach this, simply use the [ArcPY Client](https://github.com/wri/gfw_forest_loss_geotrellis_arcpy_client) +Alternatively, use the `--split_features` opton. ## Options @@ -154,10 +186,9 @@ The following options are supported: |Option |Type |Analysis or Feature Type |Description | |-----------------|------|-------------------------|---------------------------------------------------------------------------------------------------------------------------------| -|analysis |string| |Type of analysis to run `annualupdate`, `annualupdate_minimal`, `carbonflux`, `carbon_sensitivity`, `gladalerts`, `treecoverloss`| |features |string|all (required) |URI of features in TSV format | |output |string|all (required) |URI of output dir for CSV files | -|feature_type |string|all (required) |Feature type: one of 'gadm', 'wdpa', 'geostore' or 'feature | +|feature_type |string|all (required) |Feature type: one of 'gadm', 'wdpa', 'geostore', 'feature', or 'gfwpro' | |limit |int |all |Limit number of records processed | |iso_first |string|`gadm` or `wdpa` features|Filter by first letter of ISO code | |iso_start |string|`gadm` or `wdpa` features|Filter by ISO code larger than or equal to given value | @@ -165,8 +196,10 @@ The following options are supported: |iso |string|`gadm` or `wdpa` features|Filter by country ISO code | |admin1 |string|`gadm` features |Filter by country Admin1 code | |admin2 |string|`gadm` features |Filter by country Admin2 code | -|id_start |int |`feature` analysis |Filter by IDs larger than or equal to given value | +|id_start |int |`feature` analysis |Filter by IDs greater than or equal to given value | +|id_end |int |`feature` analysis |Filter by IDs less than or equal to given value | |wdpa_status |string|`wdpa` features |Filter by WDPA Status | +|iucn_cat |string|`wdpa` features |Filter by IUCS Category | |tcd |int |`treecoverloss` analysis |Select tree cover density year | |threshold |int |`treecoverloss` analysis |Treecover threshold to apply (multiple) | |contextual_layer |string|`treecoverloss` analysis |Include (multiple) selected contextual layers: `is__umd_regional_primary_forest_2001`, `is__gfw_plantations` | @@ -175,8 +208,15 @@ The following options are supported: |glad |flag |all |Filter input feature by GLAD tile extent, requires boolean `glad` field in input feature class | |change_only |flag |all except `treecover` |Process change only | |sensitivity_type |string|`carbon_sensitivity` |Select carbon sensitivity model | -|fire_alert_type |string|`firealerts` |Select Fire alert type | -|fire_alert_source|string|`firealerts` |URI of fire alert TSV file | +|fire_alert_type |string|`firealerts` |Select Fire alert type | +|fire_alert_source|string|`firealerts` |URI of fire alert TSV file | +|overwrite |flag |all |Overwrite output location if already existing | +|split_features |flag |all |Split input features along 1x1 degree grid | +|no_output_path_suffix|flag |all |Do not autogenerate output path suffix at runtime | +|intermediate_list_source|flag |`forest_change_diagnostic` analysis |URI of intermediate list results in TSV format | +|contextual_feature_type|flag |`gfwpro_dashboard` analysis |URI of intermediate list results in TSV format | +|contextual_feature_url|flag |`gfwpro_dashboard` analysis |Type of contextual features | + ## Inventory @@ -193,7 +233,7 @@ The following options are supported: For local testing input should be limited with `--limit` flag to minimize the time. ```sbt -sbt:geotrellis-wri> test:runMain org.globalforestwatch.summarystats.SummaryMain --features file:/Users/input/ten-by-ten-gadm36/wdpa__10N_010E.tsv --output file:/User/out/summary --limit 10 +sbt:geotrellis-wri> test:runMain org.globalforestwatch.summarystats.SummaryMain annualupdate_minimal --features file:/Users/input/ten-by-ten-gadm36/wdpa__10N_010E.tsv --output file:/User/out/summary --limit 10 ``` ### EMR @@ -202,6 +242,6 @@ Before running review `sbtlighter` configuration in `build.sbt`, `reload` SBT se ```sbt sbt:geotrellis-wri> sparkCreateCluster -sbt:treecoverloss> sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --features s3://gfw-files/2018_update/tsv/gadm36_1_1.csv --output s3://gfw-files/2018_update/results/summary --feature_type gadm --analysis annualupdate_minimal --tcl -sbt:treecoverloss> sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain --features s3://gfw-files/2018_update/tsv/wdpa__*.tsv --output s3://gfw-files/2018_update/results/summary --feature_type wdpa --analysis gladalerts --tcl --iso BRA +sbt:treecoverloss> sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain annualupdate_minimal --features s3://gfw-files/2018_update/tsv/gadm36_1_1.csv --output s3://gfw-files/2018_update/results/summary --feature_type gadm --tcl +sbt:treecoverloss> sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain gladalerts --features s3://gfw-files/2018_update/tsv/wdpa__*.tsv --output s3://gfw-files/2018_update/results/summary --feature_type wdpa --tcl --iso BRA ``` diff --git a/src/main/scala/org/globalforestwatch/summarystats/integrated_alerts/IntegratedAlertsCommand.scala b/src/main/scala/org/globalforestwatch/summarystats/integrated_alerts/IntegratedAlertsCommand.scala index a15dff15..979c050e 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/integrated_alerts/IntegratedAlertsCommand.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/integrated_alerts/IntegratedAlertsCommand.scala @@ -15,7 +15,7 @@ object IntegratedAlertsCommand extends SummaryCommand { val integratedAlertsCommand: Opts[Unit] = Opts.subcommand( name = IntegratedAlertsAnalysis.name, - help = "Compute GLAD summary statistics for GFW dashboards." + help = "Compute Integrated Alerts summary statistics for GFW dashboards." ) { ( defaultOptions, From a95e44f3be89399b41ae7a9a4c90d0e16a8b44ec Mon Sep 17 00:00:00 2001 From: manukala6 Date: Thu, 17 Aug 2023 14:43:20 -0700 Subject: [PATCH 18/33] GTC-2437 Add natural forest loss area --- .../summarystats/afi/AFiData.scala | 8 +++++--- .../summarystats/afi/AFiSummary.scala | 14 +++++++++----- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala index 89d131b2..7a524b74 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala @@ -7,14 +7,16 @@ import cats.Semigroup * Note: This case class contains mutable values */ case class AFiData( - var natural_land_extent: Double, + var natural_forest_extent: Double, + var natural_forest_loss_area: Double, var tree_cover_loss_area: Double, var negligible_risk_area: Double, var total_area: Double ) { def merge(other: AFiData): AFiData = { AFiData( - natural_land_extent + other.natural_land_extent, + natural_forest_extent + other.natural_forest_extent, + natural_forest_loss_area + other.natural_forest_loss_area, tree_cover_loss_area + other.tree_cover_loss_area, negligible_risk_area + other.negligible_risk_area, total_area + other.total_area @@ -24,7 +26,7 @@ case class AFiData( object AFiData { def empty: AFiData = - AFiData(0, 0, 0, 0) + AFiData(0, 0, 0, 0, 0) implicit val afiDataSemigroup: Semigroup[AFiData] = new Semigroup[AFiData] { diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala index 80f47ac1..0bb14f51 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala @@ -34,7 +34,7 @@ object AFiSummary { def visit(raster: Raster[AFiTile], col: Int, row: Int): Unit = { val lossYear: Integer = raster.tile.treeCoverLoss.getData(col, row) - val naturalLandsCategory: String = raster.tile.sbtnNaturalForest.getData(col, row) + val naturalForestCategory: String = raster.tile.sbtnNaturalForest.getData(col, row) val negligibleRisk: String = raster.tile.negligibleRisk.getData(col, row) // val iso = ... @@ -49,13 +49,13 @@ object AFiSummary { raster.cellSize ) val areaHa = area / 10000.0 - val isNaturalLand = naturalLandsCategory =="Natural Forest" + val isNaturalForest = naturalForestCategory == "Natural Forest" // TODO implement val gadmId = "IDN.24.9" val groupKey = AFiDataGroup(gadmId) - val summaryData = acc.stats.getOrElse(groupKey, AFiData(0, 0, 0, 0)) + val summaryData = acc.stats.getOrElse(groupKey, AFiData(0, 0, 0, 0, 0)) summaryData.total_area += areaHa if (lossYear >= 2021) { @@ -66,8 +66,12 @@ object AFiSummary { summaryData.negligible_risk_area += areaHa } - if (naturalLandsCategory == "Natural Forest") { - summaryData.natural_land_extent += areaHa + if (naturalForestCategory == "Natural Forest") { + summaryData.natural_forest_extent += areaHa + } + + if (lossYear >= 2021 && naturalForestCategory == "Natural Forest") { + summaryData.natural_forest_loss_area += areaHa } val new_stats = acc.stats.updated(groupKey, summaryData) From 46edc7170ccee9b2f171f83952ba7b20da999a74 Mon Sep 17 00:00:00 2001 From: Solomon Negusse Date: Fri, 18 Aug 2023 17:09:56 -0500 Subject: [PATCH 19/33] have gadm layers conform to repo's variable naming pattern --- src/main/scala/org/globalforestwatch/layers/GADMadm0.scala | 2 +- src/main/scala/org/globalforestwatch/layers/GADMadm1.scala | 2 +- src/main/scala/org/globalforestwatch/layers/GADMadm2.scala | 2 +- .../globalforestwatch/summarystats/afi/AFiGridSources.scala | 6 +++--- .../org/globalforestwatch/summarystats/afi/AFiTile.scala | 6 +++--- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/main/scala/org/globalforestwatch/layers/GADMadm0.scala b/src/main/scala/org/globalforestwatch/layers/GADMadm0.scala index a4e33dab..5df1c996 100644 --- a/src/main/scala/org/globalforestwatch/layers/GADMadm0.scala +++ b/src/main/scala/org/globalforestwatch/layers/GADMadm0.scala @@ -2,7 +2,7 @@ package org.globalforestwatch.layers import org.globalforestwatch.grids.GridTile -case class GADMadm0(gridTile: GridTile, kwargs: Map[String, Any]) +case class GadmAdm0(gridTile: GridTile, kwargs: Map[String, Any]) extends StringLayer with OptionalILayer { diff --git a/src/main/scala/org/globalforestwatch/layers/GADMadm1.scala b/src/main/scala/org/globalforestwatch/layers/GADMadm1.scala index 703581d2..835b5bdf 100644 --- a/src/main/scala/org/globalforestwatch/layers/GADMadm1.scala +++ b/src/main/scala/org/globalforestwatch/layers/GADMadm1.scala @@ -2,7 +2,7 @@ package org.globalforestwatch.layers import org.globalforestwatch.grids.GridTile -case class GADMadm1(gridTile: GridTile, kwargs: Map[String, Any]) +case class GadmAdm1(gridTile: GridTile, kwargs: Map[String, Any]) extends IntegerLayer with OptionalILayer { diff --git a/src/main/scala/org/globalforestwatch/layers/GADMadm2.scala b/src/main/scala/org/globalforestwatch/layers/GADMadm2.scala index da938fb3..2fc94035 100644 --- a/src/main/scala/org/globalforestwatch/layers/GADMadm2.scala +++ b/src/main/scala/org/globalforestwatch/layers/GADMadm2.scala @@ -2,7 +2,7 @@ package org.globalforestwatch.layers import org.globalforestwatch.grids.GridTile -case class GADMadm2(gridTile: GridTile, kwargs: Map[String, Any]) +case class GadmAdm2(gridTile: GridTile, kwargs: Map[String, Any]) extends IntegerLayer with OptionalILayer { diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala index 421b934f..c9f3094a 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiGridSources.scala @@ -13,9 +13,9 @@ case class AFiGridSources(gridTile: GridTile, kwargs: Map[String, Any]) extends val treeCoverLoss: TreeCoverLoss = TreeCoverLoss(gridTile, kwargs) val sbtnNaturalForest: SBTNNaturalForests = SBTNNaturalForests(gridTile, kwargs) val negligibleRisk: NegligibleRisk = NegligibleRisk(gridTile, kwargs) - val gadmAdm0: GADMadm0 = GADMadm0(gridTile, kwargs) - val gadmAdm1: GADMadm1 = GADMadm1(gridTile, kwargs) - val gadmAdm2: GADMadm2 = GADMadm2(gridTile, kwargs) + val gadmAdm0: GadmAdm0 = GadmAdm0(gridTile, kwargs) + val gadmAdm1: GadmAdm1 = GadmAdm1(gridTile, kwargs) + val gadmAdm2: GadmAdm2 = GadmAdm2(gridTile, kwargs) def readWindow( windowKey: SpatialKey, diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala index 3dabef95..af8a8141 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiTile.scala @@ -12,9 +12,9 @@ case class AFiTile( treeCoverLoss: TreeCoverLoss#ITile, sbtnNaturalForest: SBTNNaturalForests#OptionalITile, negligibleRisk: NegligibleRisk#OptionalITile, - gadmAdm0: GADMadm0#OptionalITile, - gadmAdm1: GADMadm1#OptionalITile, - gadmAdm2: GADMadm2#OptionalITile + gadmAdm0: GadmAdm0#OptionalITile, + gadmAdm1: GadmAdm1#OptionalITile, + gadmAdm2: GadmAdm2#OptionalITile ) extends CellGrid[Int] { def cellType: CellType = treeCoverLoss.cellType From a21c589feb52db81a0693131c294085d449aaf52 Mon Sep 17 00:00:00 2001 From: Solomon Negusse Date: Fri, 18 Aug 2023 17:10:13 -0500 Subject: [PATCH 20/33] aggregate results from nodes --- .../summarystats/afi/AFiAnalysis.scala | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index 1704f3c6..e10c75d1 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -46,7 +46,19 @@ object AFiAnalysis extends SummaryAnalysis { val summaryDF = AFiDF .getFeatureDataFrame(summaryRDD, spark) - .withColumn("gadm_id", when(col("location_id") =!= -1, lit("")).otherwise(col("gadm_id"))) + .withColumn( + "gadm_id", when(col("location_id") =!= -1|| col("gadm_id").contains("null"), lit("") ).otherwise(col("gadm_id")) + ) + .groupBy($"list_id", $"location_id", $"gadm_id") + .agg( + sum("natural_land_extent").alias("natural_land_extent"), + sum("tree_cover_loss_area").alias("tree_cover_loss_area"), + sum("negligible_risk_area").alias("negligible_risk_area"), + sum("total_area").alias("total_area"), + max("status_code").alias("status_code") + ) + .withColumn("location_error", lit("")) + val gadmAgg = summaryDF .filter($"location_id" === -1) From 240a8388aecd34581a6862b706637ffa51e9f35f Mon Sep 17 00:00:00 2001 From: Solomon Negusse Date: Fri, 18 Aug 2023 17:41:38 -0500 Subject: [PATCH 21/33] Rename GADMadm1.scala to GADMAdm1.scala Rename GADMadm0.scala to GADMAdm0.scala Rename GADMadm2.scala to GADMAdm2.scala remaining gadm file name changes --- .../globalforestwatch/layers/{GADMadm0.scala => GadmAdm0.scala} | 2 +- .../globalforestwatch/layers/{GADMadm1.scala => GadmAdm1.scala} | 2 +- .../globalforestwatch/layers/{GADMadm2.scala => GadmAdm2.scala} | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename src/main/scala/org/globalforestwatch/layers/{GADMadm0.scala => GadmAdm0.scala} (99%) rename src/main/scala/org/globalforestwatch/layers/{GADMadm1.scala => GadmAdm1.scala} (98%) rename src/main/scala/org/globalforestwatch/layers/{GADMadm2.scala => GadmAdm2.scala} (98%) diff --git a/src/main/scala/org/globalforestwatch/layers/GADMadm0.scala b/src/main/scala/org/globalforestwatch/layers/GadmAdm0.scala similarity index 99% rename from src/main/scala/org/globalforestwatch/layers/GADMadm0.scala rename to src/main/scala/org/globalforestwatch/layers/GadmAdm0.scala index 5df1c996..cd881456 100644 --- a/src/main/scala/org/globalforestwatch/layers/GADMadm0.scala +++ b/src/main/scala/org/globalforestwatch/layers/GadmAdm0.scala @@ -263,4 +263,4 @@ case class GadmAdm0(gridTile: GridTile, kwargs: Map[String, Any]) case _ => "Unknown" } } - \ No newline at end of file + diff --git a/src/main/scala/org/globalforestwatch/layers/GADMadm1.scala b/src/main/scala/org/globalforestwatch/layers/GadmAdm1.scala similarity index 98% rename from src/main/scala/org/globalforestwatch/layers/GADMadm1.scala rename to src/main/scala/org/globalforestwatch/layers/GadmAdm1.scala index 835b5bdf..78ab46bf 100644 --- a/src/main/scala/org/globalforestwatch/layers/GADMadm1.scala +++ b/src/main/scala/org/globalforestwatch/layers/GadmAdm1.scala @@ -15,4 +15,4 @@ case class GadmAdm1(gridTile: GridTile, kwargs: Map[String, Any]) } - \ No newline at end of file + diff --git a/src/main/scala/org/globalforestwatch/layers/GADMadm2.scala b/src/main/scala/org/globalforestwatch/layers/GadmAdm2.scala similarity index 98% rename from src/main/scala/org/globalforestwatch/layers/GADMadm2.scala rename to src/main/scala/org/globalforestwatch/layers/GadmAdm2.scala index 2fc94035..a7daa7bf 100644 --- a/src/main/scala/org/globalforestwatch/layers/GADMadm2.scala +++ b/src/main/scala/org/globalforestwatch/layers/GadmAdm2.scala @@ -15,4 +15,4 @@ case class GadmAdm2(gridTile: GridTile, kwargs: Map[String, Any]) } - \ No newline at end of file + From 62b20c79279af442d4dfa60515ff827280455fe2 Mon Sep 17 00:00:00 2001 From: Solomon Negusse Date: Fri, 18 Aug 2023 18:59:33 -0500 Subject: [PATCH 22/33] aggregate errors --- .../summarystats/afi/AFiAnalysis.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index e10c75d1..77602d26 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -1,5 +1,5 @@ package org.globalforestwatch.summarystats.afi -import org.apache.spark.sql.functions.{col, lit, when, sum, max} +import org.apache.spark.sql.functions.{col, lit, when, sum, max, concat_ws, collect_list} import cats.data.Validated.{Invalid, Valid} import cats.data.{NonEmptyList, Validated} import geotrellis.vector.{Feature, Geometry} @@ -55,9 +55,9 @@ object AFiAnalysis extends SummaryAnalysis { sum("tree_cover_loss_area").alias("tree_cover_loss_area"), sum("negligible_risk_area").alias("negligible_risk_area"), sum("total_area").alias("total_area"), - max("status_code").alias("status_code") + max("status_code").alias("status_code"), + concat_ws(", ", collect_list(when(col("location_error").isNotNull && col("location_error") =!= "", col("location_error")))).alias("location_error") ) - .withColumn("location_error", lit("")) val gadmAgg = summaryDF @@ -68,10 +68,10 @@ object AFiAnalysis extends SummaryAnalysis { sum("tree_cover_loss_area").alias("tree_cover_loss_area"), sum("negligible_risk_area").alias("negligible_risk_area"), sum("total_area").alias("total_area"), - max("status_code").alias("status_code") + max("status_code").alias("status_code"), + concat_ws(", ", collect_list(when(col("location_error").isNotNull && col("location_error") =!= "", col("location_error")))).alias("location_error") ) .withColumn("gadm_id", lit("")) - .withColumn("location_error", lit("")) .withColumn("location_id", lit(-1)) val combinedDF = summaryDF.unionByName(gadmAgg) From 97f3e4761334c530b9175edea97fb703679ecca5 Mon Sep 17 00:00:00 2001 From: Dan Scales Date: Mon, 21 Aug 2023 17:34:05 -0700 Subject: [PATCH 23/33] Fix description for contextual_feature_* options. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f03fd40c..6ff8421b 100644 --- a/README.md +++ b/README.md @@ -214,8 +214,8 @@ The following options are supported: |split_features |flag |all |Split input features along 1x1 degree grid | |no_output_path_suffix|flag |all |Do not autogenerate output path suffix at runtime | |intermediate_list_source|flag |`forest_change_diagnostic` analysis |URI of intermediate list results in TSV format | -|contextual_feature_type|flag |`gfwpro_dashboard` analysis |URI of intermediate list results in TSV format | -|contextual_feature_url|flag |`gfwpro_dashboard` analysis |Type of contextual features | +|contextual_feature_type|flag |`gfwpro_dashboard` analysis |type of contextual feature | +|contextual_feature_url|flag |`gfwpro_dashboard` analysis |URI of contextual feature in TSV format | ## Inventory From 845228f6cf75fdb86e96a2820c12bbad89cf1254 Mon Sep 17 00:00:00 2001 From: Dan Scales Date: Mon, 21 Aug 2023 17:43:56 -0700 Subject: [PATCH 24/33] Remove extra space indicated by codacy. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6ff8421b..0287ee93 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,7 @@ Supported input features are: * GFW Pro -### Integrated Alerts +### Integrated Alerts Integrated Alerts computes a combination of deforestation alerts based on GLAD-L, GLAD-S2, and RADD systems. It is used to compute these alerts for GFW. It optionally From c1e25cea7e58ee12a221c82f67b7de951dd30184 Mon Sep 17 00:00:00 2001 From: Solomon Negusse Date: Tue, 22 Aug 2023 14:09:11 -0500 Subject: [PATCH 25/33] incapsulate dataframe aggregation in a function --- .../summarystats/afi/AFiAnalysis.scala | 49 +++++++++---------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index 77602d26..ce269a36 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -14,6 +14,7 @@ import org.globalforestwatch.ValidatedWorkflow import org.apache.spark.rdd.RDD import org.apache.spark.sql.SparkSession import org.apache.spark.storage.StorageLevel +import org.apache.spark.sql.RelationalGroupedDataset object AFiAnalysis extends SummaryAnalysis { @@ -44,33 +45,20 @@ object AFiAnalysis extends SummaryAnalysis { // TODO somehow convert AFiSummary to AFiData import spark.implicits._ - val summaryDF = AFiDF - .getFeatureDataFrame(summaryRDD, spark) - .withColumn( - "gadm_id", when(col("location_id") =!= -1|| col("gadm_id").contains("null"), lit("") ).otherwise(col("gadm_id")) - ) - .groupBy($"list_id", $"location_id", $"gadm_id") - .agg( - sum("natural_land_extent").alias("natural_land_extent"), - sum("tree_cover_loss_area").alias("tree_cover_loss_area"), - sum("negligible_risk_area").alias("negligible_risk_area"), - sum("total_area").alias("total_area"), - max("status_code").alias("status_code"), - concat_ws(", ", collect_list(when(col("location_error").isNotNull && col("location_error") =!= "", col("location_error")))).alias("location_error") - ) - + val summaryDF = AFiAnalysis.aggregateResults( + AFiDF + .getFeatureDataFrame(summaryRDD, spark) + .withColumn( + "gadm_id", when(col("location_id") =!= -1|| col("gadm_id").contains("null"), lit("") ).otherwise(col("gadm_id")) + ) + .groupBy($"list_id", $"location_id", $"gadm_id") + ) - val gadmAgg = summaryDF + val gadmAgg = AFiAnalysis.aggregateResults( + summaryDF .filter($"location_id" === -1) - .groupBy($"list_id") - .agg( - sum("natural_land_extent").alias("natural_land_extent"), - sum("tree_cover_loss_area").alias("tree_cover_loss_area"), - sum("negligible_risk_area").alias("negligible_risk_area"), - sum("total_area").alias("total_area"), - max("status_code").alias("status_code"), - concat_ws(", ", collect_list(when(col("location_error").isNotNull && col("location_error") =!= "", col("location_error")))).alias("location_error") - ) + .groupBy($"list_id"), + ) .withColumn("gadm_id", lit("")) .withColumn("location_id", lit(-1)) @@ -85,4 +73,15 @@ object AFiAnalysis extends SummaryAnalysis { val runOutputUrl: String = getOutputUrl(kwargs) AFiExport.export(featureType, resultsDF, runOutputUrl, kwargs) } + + private def aggregateResults(group: RelationalGroupedDataset) = { + group.agg( + sum("natural_land_extent").alias("natural_land_extent"), + sum("tree_cover_loss_area").alias("tree_cover_loss_area"), + sum("negligible_risk_area").alias("negligible_risk_area"), + sum("total_area").alias("total_area"), + max("status_code").alias("status_code"), + concat_ws(", ", collect_list(when(col("location_error").isNotNull && col("location_error") =!= "", col("location_error")))).alias("location_error") + ) + } } From 4db5c2946745c0245d58e3a4224cd23848c639e0 Mon Sep 17 00:00:00 2001 From: Dan Scales Date: Mon, 28 Aug 2023 11:18:47 -0700 Subject: [PATCH 26/33] Revert TCL 2022 changes for now Front-end not ready for the new data yet. --- src/main/resources/raster-catalog-pro.json | 4 ++-- .../ForestChangeDiagnosticAnalysis.scala | 6 ------ .../ForestChangeDiagnosticDataLossYearly.scala | 1 - .../ForestChangeDiagnosticDataValueYearly.scala | 1 - ...part-00000-0a0fc858-23ba-4edd-9530-0432fb3a0ea0-c000.csv | 2 ++ ...part-00000-16db04de-3864-40aa-98a5-c6ca58e882ce-c000.csv | 2 -- 6 files changed, 4 insertions(+), 12 deletions(-) create mode 100644 src/test/resources/palm-32-fcd-output/part-00000-0a0fc858-23ba-4edd-9530-0432fb3a0ea0-c000.csv delete mode 100644 src/test/resources/palm-32-fcd-output/part-00000-16db04de-3864-40aa-98a5-c6ca58e882ce-c000.csv diff --git a/src/main/resources/raster-catalog-pro.json b/src/main/resources/raster-catalog-pro.json index 81cf6a12..f2d4e8d2 100755 --- a/src/main/resources/raster-catalog-pro.json +++ b/src/main/resources/raster-catalog-pro.json @@ -190,7 +190,7 @@ }, { "name":"umd_tree_cover_loss", - "source_uri":"s3://gfw-data-lake/umd_tree_cover_loss/v1.10/raster/epsg-4326/{grid_size}/{row_count}/year/gdal-geotiff/{tile_id}.tif" + "source_uri":"s3://gfw-data-lake/umd_tree_cover_loss/v1.9/raster/epsg-4326/{grid_size}/{row_count}/year/gdal-geotiff/{tile_id}.tif" }, { "name":"gfw_managed_forests", @@ -297,4 +297,4 @@ "source_uri": "s3://gfw-data-lake/arg_native_forest_land_plan/v202212/raster/epsg-4326/{grid_size}/{row_count}/category/gdal-geotiff/{tile_id}.tif" } ] -} \ No newline at end of file +} diff --git a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticAnalysis.scala index cb14b698..44e67a2b 100755 --- a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticAnalysis.scala @@ -91,10 +91,6 @@ object ForestChangeDiagnosticAnalysis extends SummaryAnalysis { } else { data.copy( commodity_threat_fires = fire.getOrElse(ForestChangeDiagnosticDataLossYearly.empty), - // Soy is planted late in year (Sept/Oct) and harvested in - // March. So, the most recent data relates to soy planted late - // in previous year. So, we should only intersect with tree - // cover loss from previous year. tree_cover_loss_soy_yearly = data.tree_cover_loss_soy_yearly.limitToMaxYear(2021) ) } @@ -201,8 +197,6 @@ object ForestChangeDiagnosticAnalysis extends SummaryAnalysis { usingIndex = true ) - // This fire data is an input to the palm risk tool, so limit data to 2021 to sync - // with the palm risk tool. joinedRDD.rdd .map { case (poly, points) => val fid = poly.getUserData.asInstanceOf[FeatureId] diff --git a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataLossYearly.scala b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataLossYearly.scala index 189dfcd9..9385887f 100755 --- a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataLossYearly.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataLossYearly.scala @@ -56,7 +56,6 @@ object ForestChangeDiagnosticDataLossYearly { 2019 -> 0, 2020 -> 0, 2021 -> 0, - 2022 -> 0, ) ) diff --git a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataValueYearly.scala b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataValueYearly.scala index 449b0122..fb3386e1 100755 --- a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataValueYearly.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataValueYearly.scala @@ -78,7 +78,6 @@ object ForestChangeDiagnosticDataValueYearly { 2019 -> 0, 2020 -> 0, 2021 -> 0, - 2022 -> 0, ) ) diff --git a/src/test/resources/palm-32-fcd-output/part-00000-0a0fc858-23ba-4edd-9530-0432fb3a0ea0-c000.csv b/src/test/resources/palm-32-fcd-output/part-00000-0a0fc858-23ba-4edd-9530-0432fb3a0ea0-c000.csv new file mode 100644 index 00000000..e4dc0cc2 --- /dev/null +++ b/src/test/resources/palm-32-fcd-output/part-00000-0a0fc858-23ba-4edd-9530-0432fb3a0ea0-c000.csv @@ -0,0 +1,2 @@ +list_id location_id status_code location_error tree_cover_loss_total_yearly tree_cover_loss_primary_forest_yearly tree_cover_loss_peat_yearly tree_cover_loss_intact_forest_yearly tree_cover_loss_protected_areas_yearly tree_cover_loss_arg_otbn_yearly tree_cover_loss_sea_landcover_yearly tree_cover_loss_idn_landcover_yearly tree_cover_loss_soy_yearly tree_cover_loss_idn_legal_yearly tree_cover_loss_idn_forest_moratorium_yearly tree_cover_loss_prodes_amazon_yearly tree_cover_loss_prodes_cerrado_yearly tree_cover_loss_prodes_amazon_wdpa_yearly tree_cover_loss_prodes_cerrado_wdpa_yearly tree_cover_loss_prodes_amazon_primary_forest_yearly tree_cover_loss_prodes_cerrado_primary_forest_yearly tree_cover_loss_brazil_biomes_yearly tree_cover_extent_total tree_cover_extent_primary_forest tree_cover_extent_protected_areas tree_cover_extent_peat tree_cover_extent_intact_forest natural_habitat_primary natural_habitat_intact_forest total_area protected_areas_area peat_area arg_otbn_area brazil_biomes idn_legal_area sea_landcover_area idn_landcover_area idn_forest_moratorium_area south_america_presence legal_amazon_presence brazil_biomes_presence cerrado_biome_presence southeast_asia_presence indonesia_presence argentina_presence commodity_value_forest_extent commodity_value_peat commodity_value_protected_areas commodity_threat_deforestation commodity_threat_peat commodity_threat_protected_areas commodity_threat_fires +1 31 2 {"2001":1021.7622,"2002":851.014,"2003":310.1835,"2004":2169.8398,"2005":2325.3843,"2006":4162.4968,"2007":2968.7863,"2008":4015.4403,"2009":2002.9194,"2010":1173.7001,"2011":1703.6902,"2012":2838.0498,"2013":1841.7568,"2014":2468.7732,"2015":2028.9672,"2016":3344.8135,"2017":1026.7609,"2018":525.5327,"2019":618.7052,"2020":924.699,"2021":857.8225} {"2001":154.8617,"2002":306.7253,"2003":92.3781,"2004":717.7405,"2005":1202.6952,"2006":1831.5766,"2007":1668.2764,"2008":1753.2317,"2009":797.282,"2010":454.5023,"2011":872.3613,"2012":1251.8543,"2013":1083.6799,"2014":1290.2177,"2015":1360.2574,"2016":2313.5001,"2017":286.2809,"2018":159.8557,"2019":162.3929,"2020":134.2652,"2021":167.4697} {"2001":557.4251,"2002":236.2539,"2003":71.8566,"2004":741.25,"2005":957.52,"2006":1229.3335,"2007":1037.5018,"2008":891.235,"2009":486.4665,"2010":363.5759,"2011":411.9212,"2012":1078.9246,"2013":862.5621,"2014":974.783,"2015":942.4571,"2016":1472.8429,"2017":211.3403,"2018":144.7173,"2019":148.7917,"2020":142.3323,"2021":122.7372} {} {"2001":42.2692,"2002":228.1732,"2003":11.3743,"2004":3.9196,"2005":1.614,"2006":15.3711,"2007":4.4576,"2008":2.8437,"2009":4.765,"2010":7.9931,"2011":10.7597,"2012":8.1466,"2013":0.1537,"2014":8.5307,"2015":18.6758,"2016":139.2616,"2017":10.7596,"2018":0.3843,"2019":0.2306,"2020":0.0,"2021":0.0769} {} {"Rubber plantation":{"2001":3.0745,"2002":16.5256,"2003":36.0493,"2004":66.1791,"2005":73.4812,"2006":25.9797,"2007":5.9184,"2008":56.571,"2009":47.7317,"2010":33.3581,"2011":21.9825,"2012":52.9583,"2013":11.9137,"2014":42.2742,"2015":34.2038,"2016":63.4883,"2017":10.6839,"2018":24.4423,"2019":22.1363,"2020":10.4533,"2021":25.826},"Secondary forest":{"2001":240.1012,"2002":352.6874,"2003":51.186,"2004":522.8408,"2005":879.6014,"2006":1310.6826,"2007":981.6686,"2008":756.8744,"2009":359.2934,"2010":232.485,"2011":575.4717,"2012":1110.4372,"2013":787.2514,"2014":772.2979,"2015":966.528,"2016":1571.8466,"2017":149.9382,"2018":89.3794,"2019":136.8781,"2020":121.8915,"2021":68.6318},"Agriculture":{"2001":3.151,"2002":9.1452,"2003":5.4563,"2004":53.8715,"2005":30.3561,"2006":22.9009,"2007":6.5323,"2008":10.9893,"2009":159.7649,"2010":38.7323,"2011":100.4403,"2012":104.3592,"2013":15.3698,"2014":35.8124,"2015":19.6734,"2016":38.1942,"2017":19.2886,"2018":10.5282,"2019":11.2197,"2020":7.9922,"2021":12.1419},"Oil palm plantation":{"2001":389.5357,"2002":222.339,"2003":103.9797,"2004":96.4524,"2005":67.8614,"2006":368.7244,"2007":440.2632,"2008":428.9814,"2009":151.7946,"2010":184.5942,"2011":113.5139,"2012":263.0128,"2013":147.9443,"2014":88.3878,"2015":58.1061,"2016":70.7105,"2017":44.5029,"2018":31.2823,"2019":233.0475,"2020":526.052,"2021":395.9972},"Swamp":{"2001":265.2372,"2002":112.4372,"2003":38.2726,"2004":648.495,"2005":548.2747,"2006":855.4703,"2007":1129.3025,"2008":2086.08,"2009":484.4962,"2010":300.1085,"2011":526.378,"2012":478.8799,"2013":482.4034,"2014":742.3953,"2015":446.518,"2016":539.8938,"2017":620.8959,"2018":204.1215,"2019":105.5176,"2020":122.2736,"2021":197.9767},"Settlements":{"2001":0.1537,"2002":0.9992,"2003":0.0,"2004":0.6918,"2005":0.1537,"2006":1.1529,"2007":1.1529,"2008":0.538,"2009":1.0761,"2010":0.8455,"2011":1.1529,"2012":0.8454,"2013":0.0,"2014":0.6918,"2015":0.1537,"2016":0.2306,"2017":0.3843,"2018":0.0,"2019":0.1537,"2020":1.1529,"2021":1.3067},"Grassland/shrub":{"2001":59.3337,"2002":89.231,"2003":37.5821,"2004":445.7701,"2005":432.4583,"2006":514.3995,"2007":235.9463,"2008":500.7963,"2009":334.6362,"2010":269.6786,"2011":186.2981,"2012":378.8895,"2013":330.4736,"2014":424.3189,"2015":165.2413,"2016":151.5619,"2017":77.7013,"2018":84.6964,"2019":29.59,"2020":91.3842,"2021":53.8004},"Primary forest":{"2001":41.1934,"2002":30.6653,"2003":13.68,"2004":98.6793,"2005":209.8123,"2006":379.429,"2007":115.8962,"2008":96.2208,"2009":368.2156,"2010":47.8819,"2011":42.0413,"2012":228.795,"2013":26.1305,"2014":255.8481,"2015":270.3755,"2016":823.8133,"2017":81.5399,"2018":47.9595,"2019":64.4845,"2020":22.7495,"2021":71.7856},"Water bodies":{"2001":0.8454,"2002":0.0768,"2003":0.0,"2004":0.1537,"2005":0.0,"2006":0.0,"2007":0.0769,"2008":0.0,"2009":0.0,"2010":0.1537,"2011":0.2306,"2012":0.6916,"2013":0.6917,"2014":0.6148,"2015":0.0,"2016":0.2306,"2017":0.0768,"2018":0.0,"2019":0.0,"2020":0.0,"2021":0.2305},"Mixed tree crops":{"2001":19.1363,"2002":16.9073,"2003":23.9776,"2004":236.7062,"2005":83.3852,"2006":683.7575,"2007":52.029,"2008":78.3891,"2009":95.9108,"2010":65.8624,"2011":136.1808,"2012":219.1809,"2013":39.5784,"2014":106.132,"2015":68.1674,"2016":84.8439,"2017":21.749,"2018":33.1229,"2019":15.6777,"2020":20.7498,"2021":30.1257}} {"Bare land":{"2001":3.8428,"2002":35.2766,"2003":5.3801,"2004":14.4491,"2005":17.6005,"2006":39.8116,"2007":99.1447,"2008":141.5687,"2009":59.9482,"2010":20.7508,"2011":136.3415,"2012":129.3478,"2013":94.2991,"2014":83.0794,"2015":280.0642,"2016":735.1371,"2017":28.9729,"2018":36.1198,"2019":8.3774,"2020":7.1477,"2021":8.0699},"Mining":{"2001":7.301,"2002":2.7666,"2003":5.2258,"2004":11.9889,"2005":15.2172,"2006":9.1456,"2007":7.6082,"2008":34.8914,"2009":16.9072,"2010":8.9918,"2011":12.4502,"2012":29.5112,"2013":1.0759,"2014":16.8304,"2015":2.5362,"2016":1.9982,"2017":1.7676,"2018":0.7685,"2019":0.3074,"2020":0.4611,"2021":1.7676},"Settlement":{"2001":30.2802,"2002":84.4598,"2003":6.7627,"2004":5.226,"2005":1.9982,"2006":15.5239,"2007":5.3029,"2008":146.7178,"2009":9.1456,"2010":5.9944,"2011":8.2999,"2012":20.9038,"2013":6.4557,"2014":10.4519,"2015":14.0641,"2016":20.5962,"2017":9.6834,"2018":5.9946,"2019":5.6871,"2020":7.5318,"2021":7.5314},"Secondary forest":{"2001":14.8329,"2002":34.5077,"2003":10.3753,"2004":63.4026,"2005":86.5381,"2006":58.5628,"2007":81.0051,"2008":222.8806,"2009":92.1507,"2010":46.96,"2011":105.6723,"2012":258.1458,"2013":358.5935,"2014":604.2224,"2015":692.3818,"2016":1208.3837,"2017":259.4575,"2018":110.7479,"2019":151.2503,"2020":92.763,"2021":119.8204},"Agriculture":{"2001":87.6883,"2002":42.4224,"2003":18.9819,"2004":289.878,"2005":266.1325,"2006":746.3828,"2007":376.2672,"2008":177.9118,"2009":282.8805,"2010":120.1185,"2011":271.2771,"2012":638.4726,"2013":155.6217,"2014":248.7641,"2015":220.5643,"2016":382.8723,"2017":105.1299,"2018":55.5624,"2019":44.6495,"2020":56.5613,"2021":42.3446},"Swamp":{"2001":110.6023,"2002":161.6235,"2003":30.1276,"2004":349.3156,"2005":345.4651,"2006":346.3096,"2007":162.935,"2008":218.7309,"2009":146.9478,"2010":95.2199,"2011":132.4202,"2012":382.8885,"2013":159.6257,"2014":317.4138,"2015":296.5912,"2016":418.5593,"2017":115.2812,"2018":74.3201,"2019":85.1564,"2020":78.4694,"2021":128.1948},"Grassland/shrub":{"2001":4.9185,"2002":20.2891,"2003":11.7584,"2004":38.7334,"2005":22.748,"2006":135.7978,"2007":15.2937,"2008":74.7011,"2009":35.6594,"2010":20.4429,"2011":50.7993,"2012":105.518,"2013":11.4509,"2014":46.2648,"2015":55.7949,"2016":62.8662,"2017":264.4497,"2018":34.5065,"2019":9.8372,"2020":5.1492,"2021":6.9934},"Estate crop plantation":{"2001":759.8369,"2002":469.6682,"2003":221.0338,"2004":1396.0776,"2005":1569.454,"2006":2808.3496,"2007":2218.0015,"2008":2990.2754,"2009":1359.0495,"2010":854.7606,"2011":983.3553,"2012":1269.5731,"2013":1053.2509,"2014":1138.9796,"2015":466.1251,"2016":509.328,"2017":239.7129,"2018":205.5913,"2019":312.8252,"2020":676.3081,"2021":539.8723},"Body of water":{"2001":2.4593,"2002":0.0,"2003":0.538,"2004":0.7685,"2005":0.2306,"2006":2.6132,"2007":3.228,"2008":7.7625,"2009":0.2306,"2010":0.4611,"2011":3.0743,"2012":3.689,"2013":1.3834,"2014":2.7668,"2015":0.8454,"2016":5.0725,"2017":2.3057,"2018":1.9215,"2019":0.6148,"2020":0.3074,"2021":3.228}} {} {"Other Utilization Area":{"2001":712.0267,"2002":482.1867,"2003":221.5682,"2004":1414.5116,"2005":1126.5942,"2006":2837.7298,"2007":1853.8397,"2008":3013.7624,"2009":1165.5631,"2010":833.4598,"2011":1098.1437,"2012":1865.7614,"2013":971.0994,"2014":1259.078,"2015":657.4037,"2016":999.5492,"2017":622.2031,"2018":279.4429,"2019":376.9137,"2020":705.8893,"2021":595.4311},"Production Forest":{"2001":113.1434,"2002":80.4763,"2003":7.6858,"2004":26.0567,"2005":48.7316,"2006":183.5469,"2007":84.3149,"2008":147.2649,"2009":84.4729,"2010":56.57,"2011":110.1392,"2012":156.3372,"2013":49.1136,"2014":173.4782,"2015":154.1063,"2016":334.2621,"2017":35.1256,"2018":10.4532,"2019":17.4472,"2020":24.9028,"2021":17.5241},"Converted Production Forest":{"2001":151.8635,"2002":60.1778,"2003":69.0172,"2004":724.5834,"2005":1148.2139,"2006":1123.3127,"2007":1023.561,"2008":844.268,"2009":747.8878,"2010":275.2161,"2011":481.5731,"2012":804.1156,"2013":820.0067,"2014":1024.9196,"2015":1197.936,"2016":1866.668,"2017":356.367,"2018":233.3308,"2019":223.499,"2020":193.5994,"2021":241.5624},"Sanctuary Reserves/Nature Conservation Area":{"2001":42.2692,"2002":228.1732,"2003":11.3743,"2004":3.9196,"2005":1.614,"2006":15.3711,"2007":4.4576,"2008":2.8437,"2009":4.765,"2010":7.9931,"2011":10.7597,"2012":8.1466,"2013":0.1537,"2014":8.5307,"2015":18.6758,"2016":139.2616,"2017":10.7596,"2018":0.3843,"2019":0.2306,"2020":0.0,"2021":0.0769}} {"2001":85.0014,"2002":248.2325,"2003":18.829,"2004":97.8293,"2005":96.2941,"2006":176.9875,"2007":138.7928,"2008":129.4126,"2009":109.4342,"2010":65.0144,"2011":100.5959,"2012":428.132,"2013":566.3779,"2014":467.2467,"2015":304.2577,"2016":712.6515,"2017":145.3232,"2018":56.2574,"2019":82.8502,"2020":54.0272,"2021":24.2097} {} {} {} {} {} {} {} 76338.8266 34513.678 6014.0986 23301.5138 0.0 34530.8164 0.0 125583.7284 6614.0107 31984.9079 {} {} {"Other Utilization Area":81822.5991,"Production Forest":4848.8827,"Converted Production Forest":28600.5448,"Sanctuary Reserves/Nature Conservation Area":6614.0107} {"Rubber plantation":3542.3364,"Secondary forest":24896.0268,"Agriculture":2763.3729,"Oil palm plantation":24398.5485,"Swamp":29043.6031,"Settlements":851.2415,"Grassland/shrub":22752.6953,"Primary forest":8261.1709,"Water bodies":3133.0348,"Mixed tree crops":5941.6982} {"Bare land":2902.1353,"Mining":1392.3433,"Settlement":5798.0268,"Secondary forest":21966.5842,"Agriculture":9963.5593,"Swamp":7625.7847,"Grassland/shrub":2875.049,"Estate crop plantation":69353.0242,"Body of water":3707.2217} 13342.6717 false false false false true true false {"2002":24579.8084,"2003":24451.4643,"2004":24189.9373,"2005":24132.0676,"2006":23677.7243,"2007":23217.4612,"2008":22247.8139,"2009":21697.0839,"2010":21235.5008,"2011":20791.3746,"2012":20560.122,"2013":20057.1275,"2014":19054.2097,"2015":18742.9578,"2016":18210.5965,"2017":17209.7962,"2018":15452.9753,"2019":15189.6774,"2020":15071.8608,"2021":14916.0005} {"2002":31984.9079,"2003":31984.9079,"2004":31984.9079,"2005":31984.9079,"2006":31984.9079,"2007":31984.9079,"2008":31984.9079,"2009":31984.9079,"2010":31984.9079,"2011":31984.9079,"2012":31984.9079,"2013":31984.9079,"2014":31984.9079,"2015":31984.9079,"2016":31984.9079,"2017":31984.9079,"2018":31984.9079,"2019":31984.9079,"2020":31984.9079,"2021":31984.9079} {"2002":6614.0107,"2003":6614.0107,"2004":6614.0107,"2005":6614.0107,"2006":6614.0107,"2007":6614.0107,"2008":6614.0107,"2009":6614.0107,"2010":6614.0107,"2011":6614.0107,"2012":6614.0107,"2013":6614.0107,"2014":6614.0107,"2015":6614.0107,"2016":6614.0107,"2017":6614.0107,"2018":6614.0107,"2019":6614.0107,"2020":6614.0107,"2021":6614.0107} {"2002":389.8711,"2003":319.3967,"2004":512.213,"2005":914.6063,"2006":1429.9104,"2007":1520.3773,"2008":1012.313,"2009":905.7094,"2010":675.3788,"2011":734.247,"2012":1505.9123,"2013":1314.1698,"2014":843.6132,"2015":1533.1615,"2016":2757.6213,"2017":2020.1188,"2018":381.1145,"2019":273.6768,"2020":272.6016,"2021":245.5511} {"2002":13363.7925,"2003":13309.3807,"2004":13472.9171,"2005":13694.9388,"2006":14063.9863,"2007":14195.4089,"2008":13789.4768,"2009":13613.3257,"2010":13540.0855,"2011":13478.2203,"2012":13889.142,"2013":13891.8339,"2014":13604.4896,"2015":14052.9405,"2016":14710.2098,"2017":14207.349,"2018":13390.077,"2019":13385.3927,"2020":13385.8532,"2021":13355.5725} {"2002":222.102,"2003":200.4294,"2004":9.6067,"2005":0.4611,"2006":3.7659,"2007":3.8427,"2008":0.1537,"2009":1.2297,"2010":2.8437,"2011":2.9205,"2012":3.2279,"2013":1.9982,"2014":0.3843,"2015":13.7571,"2016":74.1656,"2017":63.0214,"2018":2.2287,"2019":0.0,"2020":0.0,"2021":0.0} {} diff --git a/src/test/resources/palm-32-fcd-output/part-00000-16db04de-3864-40aa-98a5-c6ca58e882ce-c000.csv b/src/test/resources/palm-32-fcd-output/part-00000-16db04de-3864-40aa-98a5-c6ca58e882ce-c000.csv deleted file mode 100644 index 5d76cb71..00000000 --- a/src/test/resources/palm-32-fcd-output/part-00000-16db04de-3864-40aa-98a5-c6ca58e882ce-c000.csv +++ /dev/null @@ -1,2 +0,0 @@ -list_id location_id status_code location_error tree_cover_loss_total_yearly tree_cover_loss_primary_forest_yearly tree_cover_loss_peat_yearly tree_cover_loss_intact_forest_yearly tree_cover_loss_protected_areas_yearly tree_cover_loss_arg_otbn_yearly tree_cover_loss_sea_landcover_yearly tree_cover_loss_idn_landcover_yearly tree_cover_loss_soy_yearly tree_cover_loss_idn_legal_yearly tree_cover_loss_idn_forest_moratorium_yearly tree_cover_loss_prodes_amazon_yearly tree_cover_loss_prodes_cerrado_yearly tree_cover_loss_prodes_amazon_wdpa_yearly tree_cover_loss_prodes_cerrado_wdpa_yearly tree_cover_loss_prodes_amazon_primary_forest_yearly tree_cover_loss_prodes_cerrado_primary_forest_yearly tree_cover_loss_brazil_biomes_yearly tree_cover_extent_total tree_cover_extent_primary_forest tree_cover_extent_protected_areas tree_cover_extent_peat tree_cover_extent_intact_forest natural_habitat_primary natural_habitat_intact_forest total_area protected_areas_area peat_area arg_otbn_area brazil_biomes idn_legal_area sea_landcover_area idn_landcover_area idn_forest_moratorium_area south_america_presence legal_amazon_presence brazil_biomes_presence cerrado_biome_presence southeast_asia_presence indonesia_presence argentina_presence commodity_value_forest_extent commodity_value_peat commodity_value_protected_areas commodity_threat_deforestation commodity_threat_peat commodity_threat_protected_areas commodity_threat_fires -1 31 2 {"2001":1021.7622,"2002":851.014,"2003":310.1835,"2004":2169.8398,"2005":2325.3843,"2006":4162.4968,"2007":2968.7863,"2008":4015.4403,"2009":2002.9194,"2010":1173.7001,"2011":1703.6902,"2012":2838.0498,"2013":1841.7568,"2014":2468.7732,"2015":2028.9672,"2016":3344.8135,"2017":1026.7609,"2018":525.5327,"2019":618.7052,"2020":924.699,"2021":857.8225,"2022":560.0482} {"2001":154.8617,"2002":306.7253,"2003":92.3781,"2004":717.7405,"2005":1202.6952,"2006":1831.5766,"2007":1668.2764,"2008":1753.2317,"2009":797.282,"2010":454.5023,"2011":872.3613,"2012":1251.8543,"2013":1083.6799,"2014":1290.2177,"2015":1360.2574,"2016":2313.5001,"2017":286.2809,"2018":159.8557,"2019":162.3929,"2020":134.2652,"2021":167.4697,"2022":133.6506} {"2001":557.4251,"2002":236.2539,"2003":71.8566,"2004":741.25,"2005":957.52,"2006":1229.3335,"2007":1037.5018,"2008":891.235,"2009":486.4665,"2010":363.5759,"2011":411.9212,"2012":1078.9246,"2013":862.5621,"2014":974.783,"2015":942.4571,"2016":1472.8429,"2017":211.3403,"2018":144.7173,"2019":148.7917,"2020":142.3323,"2021":122.7372,"2022":94.914} {} {"2001":42.2692,"2002":228.1732,"2003":11.3743,"2004":3.9196,"2005":1.614,"2006":15.3711,"2007":4.4576,"2008":2.8437,"2009":4.765,"2010":7.9931,"2011":10.7597,"2012":8.1466,"2013":0.1537,"2014":8.5307,"2015":18.6758,"2016":139.2616,"2017":10.7596,"2018":0.3843,"2019":0.2306,"2020":0.0,"2021":0.0769,"2022":0.0} {} {"Rubber plantation":{"2001":3.0745,"2002":16.5256,"2003":36.0493,"2004":66.1791,"2005":73.4812,"2006":25.9797,"2007":5.9184,"2008":56.571,"2009":47.7317,"2010":33.3581,"2011":21.9825,"2012":52.9583,"2013":11.9137,"2014":42.2742,"2015":34.2038,"2016":63.4883,"2017":10.6839,"2018":24.4423,"2019":22.1363,"2020":10.4533,"2021":25.826,"2022":26.1332},"Secondary forest":{"2001":240.1012,"2002":352.6874,"2003":51.186,"2004":522.8408,"2005":879.6014,"2006":1310.6826,"2007":981.6686,"2008":756.8744,"2009":359.2934,"2010":232.485,"2011":575.4717,"2012":1110.4372,"2013":787.2514,"2014":772.2979,"2015":966.528,"2016":1571.8466,"2017":149.9382,"2018":89.3794,"2019":136.8781,"2020":121.8915,"2021":68.6318,"2022":99.681},"Agriculture":{"2001":3.151,"2002":9.1452,"2003":5.4563,"2004":53.8715,"2005":30.3561,"2006":22.9009,"2007":6.5323,"2008":10.9893,"2009":159.7649,"2010":38.7323,"2011":100.4403,"2012":104.3592,"2013":15.3698,"2014":35.8124,"2015":19.6734,"2016":38.1942,"2017":19.2886,"2018":10.5282,"2019":11.2197,"2020":7.9922,"2021":12.1419,"2022":9.2218},"Oil palm plantation":{"2001":389.5357,"2002":222.339,"2003":103.9797,"2004":96.4524,"2005":67.8614,"2006":368.7244,"2007":440.2632,"2008":428.9814,"2009":151.7946,"2010":184.5942,"2011":113.5139,"2012":263.0128,"2013":147.9443,"2014":88.3878,"2015":58.1061,"2016":70.7105,"2017":44.5029,"2018":31.2823,"2019":233.0475,"2020":526.052,"2021":395.9972,"2022":105.2237},"Swamp":{"2001":265.2372,"2002":112.4372,"2003":38.2726,"2004":648.495,"2005":548.2747,"2006":855.4703,"2007":1129.3025,"2008":2086.08,"2009":484.4962,"2010":300.1085,"2011":526.378,"2012":478.8799,"2013":482.4034,"2014":742.3953,"2015":446.518,"2016":539.8938,"2017":620.8959,"2018":204.1215,"2019":105.5176,"2020":122.2736,"2021":197.9767,"2022":240.6287},"Settlements":{"2001":0.1537,"2002":0.9992,"2003":0.0,"2004":0.6918,"2005":0.1537,"2006":1.1529,"2007":1.1529,"2008":0.538,"2009":1.0761,"2010":0.8455,"2011":1.1529,"2012":0.8454,"2013":0.0,"2014":0.6918,"2015":0.1537,"2016":0.2306,"2017":0.3843,"2018":0.0,"2019":0.1537,"2020":1.1529,"2021":1.3067,"2022":1.691},"Grassland/shrub":{"2001":59.3337,"2002":89.231,"2003":37.5821,"2004":445.7701,"2005":432.4583,"2006":514.3995,"2007":235.9463,"2008":500.7963,"2009":334.6362,"2010":269.6786,"2011":186.2981,"2012":378.8895,"2013":330.4736,"2014":424.3189,"2015":165.2413,"2016":151.5619,"2017":77.7013,"2018":84.6964,"2019":29.59,"2020":91.3842,"2021":53.8004,"2022":41.7326},"Primary forest":{"2001":41.1934,"2002":30.6653,"2003":13.68,"2004":98.6793,"2005":209.8123,"2006":379.429,"2007":115.8962,"2008":96.2208,"2009":368.2156,"2010":47.8819,"2011":42.0413,"2012":228.795,"2013":26.1305,"2014":255.8481,"2015":270.3755,"2016":823.8133,"2017":81.5399,"2018":47.9595,"2019":64.4845,"2020":22.7495,"2021":71.7856,"2022":5.7642},"Water bodies":{"2001":0.8454,"2002":0.0768,"2003":0.0,"2004":0.1537,"2005":0.0,"2006":0.0,"2007":0.0769,"2008":0.0,"2009":0.0,"2010":0.1537,"2011":0.2306,"2012":0.6916,"2013":0.6917,"2014":0.6148,"2015":0.0,"2016":0.2306,"2017":0.0768,"2018":0.0,"2019":0.0,"2020":0.0,"2021":0.2305,"2022":0.1537},"Mixed tree crops":{"2001":19.1363,"2002":16.9073,"2003":23.9776,"2004":236.7062,"2005":83.3852,"2006":683.7575,"2007":52.029,"2008":78.3891,"2009":95.9108,"2010":65.8624,"2011":136.1808,"2012":219.1809,"2013":39.5784,"2014":106.132,"2015":68.1674,"2016":84.8439,"2017":21.749,"2018":33.1229,"2019":15.6777,"2020":20.7498,"2021":30.1257,"2022":29.8185}} {"Bare land":{"2001":3.8428,"2002":35.2766,"2003":5.3801,"2004":14.4491,"2005":17.6005,"2006":39.8116,"2007":99.1447,"2008":141.5687,"2009":59.9482,"2010":20.7508,"2011":136.3415,"2012":129.3478,"2013":94.2991,"2014":83.0794,"2015":280.0642,"2016":735.1371,"2017":28.9729,"2018":36.1198,"2019":8.3774,"2020":7.1477,"2021":8.0699,"2022":2.3824},"Mining":{"2001":7.301,"2002":2.7666,"2003":5.2258,"2004":11.9889,"2005":15.2172,"2006":9.1456,"2007":7.6082,"2008":34.8914,"2009":16.9072,"2010":8.9918,"2011":12.4502,"2012":29.5112,"2013":1.0759,"2014":16.8304,"2015":2.5362,"2016":1.9982,"2017":1.7676,"2018":0.7685,"2019":0.3074,"2020":0.4611,"2021":1.7676,"2022":3.5353},"Settlement":{"2001":30.2802,"2002":84.4598,"2003":6.7627,"2004":5.226,"2005":1.9982,"2006":15.5239,"2007":5.3029,"2008":146.7178,"2009":9.1456,"2010":5.9944,"2011":8.2999,"2012":20.9038,"2013":6.4557,"2014":10.4519,"2015":14.0641,"2016":20.5962,"2017":9.6834,"2018":5.9946,"2019":5.6871,"2020":7.5318,"2021":7.5314,"2022":7.7622},"Secondary forest":{"2001":14.8329,"2002":34.5077,"2003":10.3753,"2004":63.4026,"2005":86.5381,"2006":58.5628,"2007":81.0051,"2008":222.8806,"2009":92.1507,"2010":46.96,"2011":105.6723,"2012":258.1458,"2013":358.5935,"2014":604.2224,"2015":692.3818,"2016":1208.3837,"2017":259.4575,"2018":110.7479,"2019":151.2503,"2020":92.763,"2021":119.8204,"2022":109.8252},"Agriculture":{"2001":87.6883,"2002":42.4224,"2003":18.9819,"2004":289.878,"2005":266.1325,"2006":746.3828,"2007":376.2672,"2008":177.9118,"2009":282.8805,"2010":120.1185,"2011":271.2771,"2012":638.4726,"2013":155.6217,"2014":248.7641,"2015":220.5643,"2016":382.8723,"2017":105.1299,"2018":55.5624,"2019":44.6495,"2020":56.5613,"2021":42.3446,"2022":44.4967},"Swamp":{"2001":110.6023,"2002":161.6235,"2003":30.1276,"2004":349.3156,"2005":345.4651,"2006":346.3096,"2007":162.935,"2008":218.7309,"2009":146.9478,"2010":95.2199,"2011":132.4202,"2012":382.8885,"2013":159.6257,"2014":317.4138,"2015":296.5912,"2016":418.5593,"2017":115.2812,"2018":74.3201,"2019":85.1564,"2020":78.4694,"2021":128.1948,"2022":88.1532},"Grassland/shrub":{"2001":4.9185,"2002":20.2891,"2003":11.7584,"2004":38.7334,"2005":22.748,"2006":135.7978,"2007":15.2937,"2008":74.7011,"2009":35.6594,"2010":20.4429,"2011":50.7993,"2012":105.518,"2013":11.4509,"2014":46.2648,"2015":55.7949,"2016":62.8662,"2017":264.4497,"2018":34.5065,"2019":9.8372,"2020":5.1492,"2021":6.9934,"2022":55.8711},"Estate crop plantation":{"2001":759.8369,"2002":469.6682,"2003":221.0338,"2004":1396.0776,"2005":1569.454,"2006":2808.3496,"2007":2218.0015,"2008":2990.2754,"2009":1359.0495,"2010":854.7606,"2011":983.3553,"2012":1269.5731,"2013":1053.2509,"2014":1138.9796,"2015":466.1251,"2016":509.328,"2017":239.7129,"2018":205.5913,"2019":312.8252,"2020":676.3081,"2021":539.8723,"2022":247.7148},"Body of water":{"2001":2.4593,"2002":0.0,"2003":0.538,"2004":0.7685,"2005":0.2306,"2006":2.6132,"2007":3.228,"2008":7.7625,"2009":0.2306,"2010":0.4611,"2011":3.0743,"2012":3.689,"2013":1.3834,"2014":2.7668,"2015":0.8454,"2016":5.0725,"2017":2.3057,"2018":1.9215,"2019":0.6148,"2020":0.3074,"2021":3.228,"2022":0.3074}} {} {"Other Utilization Area":{"2001":712.0267,"2002":482.1867,"2003":221.5682,"2004":1414.5116,"2005":1126.5942,"2006":2837.7298,"2007":1853.8397,"2008":3013.7624,"2009":1165.5631,"2010":833.4598,"2011":1098.1437,"2012":1865.7614,"2013":971.0994,"2014":1259.078,"2015":657.4037,"2016":999.5492,"2017":622.2031,"2018":279.4429,"2019":376.9137,"2020":705.8893,"2021":595.4311,"2022":299.8162},"Production Forest":{"2001":113.1434,"2002":80.4763,"2003":7.6858,"2004":26.0567,"2005":48.7316,"2006":183.5469,"2007":84.3149,"2008":147.2649,"2009":84.4729,"2010":56.57,"2011":110.1392,"2012":156.3372,"2013":49.1136,"2014":173.4782,"2015":154.1063,"2016":334.2621,"2017":35.1256,"2018":10.4532,"2019":17.4472,"2020":24.9028,"2021":17.5241,"2022":26.3625},"Converted Production Forest":{"2001":151.8635,"2002":60.1778,"2003":69.0172,"2004":724.5834,"2005":1148.2139,"2006":1123.3127,"2007":1023.561,"2008":844.268,"2009":747.8878,"2010":275.2161,"2011":481.5731,"2012":804.1156,"2013":820.0067,"2014":1024.9196,"2015":1197.936,"2016":1866.668,"2017":356.367,"2018":233.3308,"2019":223.499,"2020":193.5994,"2021":241.5624,"2022":233.5622},"Sanctuary Reserves/Nature Conservation Area":{"2001":42.2692,"2002":228.1732,"2003":11.3743,"2004":3.9196,"2005":1.614,"2006":15.3711,"2007":4.4576,"2008":2.8437,"2009":4.765,"2010":7.9931,"2011":10.7597,"2012":8.1466,"2013":0.1537,"2014":8.5307,"2015":18.6758,"2016":139.2616,"2017":10.7596,"2018":0.3843,"2019":0.2306,"2020":0.0,"2021":0.0769,"2022":0.0}} {"2001":85.0014,"2002":248.2325,"2003":18.829,"2004":97.8293,"2005":96.2941,"2006":176.9875,"2007":138.7928,"2008":129.4126,"2009":109.4342,"2010":65.0144,"2011":100.5959,"2012":428.132,"2013":566.3779,"2014":467.2467,"2015":304.2577,"2016":712.6515,"2017":145.3232,"2018":56.2574,"2019":82.8502,"2020":54.0272,"2021":24.2097,"2022":14.7553} {} {} {} {} {} {} {} 76338.8266 34513.678 6014.0986 23301.5138 0.0 34530.8164 0.0 125583.7284 6614.0107 31984.9079 {} {} {"Other Utilization Area":81822.5991,"Production Forest":4848.8827,"Converted Production Forest":28600.5448,"Sanctuary Reserves/Nature Conservation Area":6614.0107} {"Rubber plantation":3542.3364,"Secondary forest":24896.0268,"Agriculture":2763.3729,"Oil palm plantation":24398.5485,"Swamp":29043.6031,"Settlements":851.2415,"Grassland/shrub":22752.6953,"Primary forest":8261.1709,"Water bodies":3133.0348,"Mixed tree crops":5941.6982} {"Bare land":2902.1353,"Mining":1392.3433,"Settlement":5798.0268,"Secondary forest":21966.5842,"Agriculture":9963.5593,"Swamp":7625.7847,"Grassland/shrub":2875.049,"Estate crop plantation":69353.0242,"Body of water":3707.2217} 13342.6717 false false false false true true false {"2002":24579.8084,"2003":24451.4643,"2004":24189.9373,"2005":24132.0676,"2006":23677.7243,"2007":23217.4612,"2008":22247.8139,"2009":21697.0839,"2010":21235.5008,"2011":20791.3746,"2012":20560.122,"2013":20057.1275,"2014":19054.2097,"2015":18742.9578,"2016":18210.5965,"2017":17209.7962,"2018":15452.9753,"2019":15189.6774,"2020":15071.8608,"2021":14916.0005} {"2002":31984.9079,"2003":31984.9079,"2004":31984.9079,"2005":31984.9079,"2006":31984.9079,"2007":31984.9079,"2008":31984.9079,"2009":31984.9079,"2010":31984.9079,"2011":31984.9079,"2012":31984.9079,"2013":31984.9079,"2014":31984.9079,"2015":31984.9079,"2016":31984.9079,"2017":31984.9079,"2018":31984.9079,"2019":31984.9079,"2020":31984.9079,"2021":31984.9079} {"2002":6614.0107,"2003":6614.0107,"2004":6614.0107,"2005":6614.0107,"2006":6614.0107,"2007":6614.0107,"2008":6614.0107,"2009":6614.0107,"2010":6614.0107,"2011":6614.0107,"2012":6614.0107,"2013":6614.0107,"2014":6614.0107,"2015":6614.0107,"2016":6614.0107,"2017":6614.0107,"2018":6614.0107,"2019":6614.0107,"2020":6614.0107,"2021":6614.0107} {"2002":389.8711,"2003":319.3967,"2004":512.213,"2005":914.6063,"2006":1429.9104,"2007":1520.3773,"2008":1012.313,"2009":905.7094,"2010":675.3788,"2011":734.247,"2012":1505.9123,"2013":1314.1698,"2014":843.6132,"2015":1533.1615,"2016":2757.6213,"2017":2020.1188,"2018":381.1145,"2019":273.6768,"2020":272.6016,"2021":245.5511} {"2002":13363.7925,"2003":13309.3807,"2004":13472.9171,"2005":13694.9388,"2006":14063.9863,"2007":14195.4089,"2008":13789.4768,"2009":13613.3257,"2010":13540.0855,"2011":13478.2203,"2012":13889.142,"2013":13891.8339,"2014":13604.4896,"2015":14052.9405,"2016":14710.2098,"2017":14207.349,"2018":13390.077,"2019":13385.3927,"2020":13385.8532,"2021":13355.5725} {"2002":222.102,"2003":200.4294,"2004":9.6067,"2005":0.4611,"2006":3.7659,"2007":3.8427,"2008":0.1537,"2009":1.2297,"2010":2.8437,"2011":2.9205,"2012":3.2279,"2013":1.9982,"2014":0.3843,"2015":13.7571,"2016":74.1656,"2017":63.0214,"2018":2.2287,"2019":0.0,"2020":0.0,"2021":0.0} {} From 2725bdf3bacaf511649c8e3823fc4f9bd44b0fd9 Mon Sep 17 00:00:00 2001 From: manukala6 Date: Mon, 28 Aug 2023 16:31:06 -0700 Subject: [PATCH 27/33] GTC-2437 Make columns align with naming convention --- .../summarystats/afi/AFiAnalysis.scala | 14 ++++++------- .../summarystats/afi/AFiData.scala | 20 +++++++++---------- .../summarystats/afi/AFiSummary.scala | 14 +++++-------- 3 files changed, 21 insertions(+), 27 deletions(-) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index ce269a36..742d7cfc 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -65,10 +65,10 @@ object AFiAnalysis extends SummaryAnalysis { val combinedDF = summaryDF.unionByName(gadmAgg) val resultsDF = combinedDF .withColumn( - "negligible_risk_percent", - $"negligible_risk_area" / $"total_area" * 100 + "negligible_risk__percent", + $"negligible_risk_area__ha" / $"total_area__ha" * 100 ) - .drop("negligible_risk_area") + .drop("negligible_risk_area__ha") val runOutputUrl: String = getOutputUrl(kwargs) AFiExport.export(featureType, resultsDF, runOutputUrl, kwargs) @@ -76,10 +76,10 @@ object AFiAnalysis extends SummaryAnalysis { private def aggregateResults(group: RelationalGroupedDataset) = { group.agg( - sum("natural_land_extent").alias("natural_land_extent"), - sum("tree_cover_loss_area").alias("tree_cover_loss_area"), - sum("negligible_risk_area").alias("negligible_risk_area"), - sum("total_area").alias("total_area"), + sum("natural_forest__extent").alias("natural_forest__extent"), + sum("natural_forest_loss__ha").alias("natural_forest_loss__ha"), + sum("negligible_risk_area__ha").alias("negligible_risk_area__ha"), + sum("total_area__ha").alias("total_area__ha"), max("status_code").alias("status_code"), concat_ws(", ", collect_list(when(col("location_error").isNotNull && col("location_error") =!= "", col("location_error")))).alias("location_error") ) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala index 7a524b74..dd4b1eb6 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiData.scala @@ -7,26 +7,24 @@ import cats.Semigroup * Note: This case class contains mutable values */ case class AFiData( - var natural_forest_extent: Double, - var natural_forest_loss_area: Double, - var tree_cover_loss_area: Double, - var negligible_risk_area: Double, - var total_area: Double + var natural_forest__extent: Double, + var natural_forest_loss__ha: Double, + var negligible_risk_area__ha: Double, + var total_area__ha: Double ) { def merge(other: AFiData): AFiData = { AFiData( - natural_forest_extent + other.natural_forest_extent, - natural_forest_loss_area + other.natural_forest_loss_area, - tree_cover_loss_area + other.tree_cover_loss_area, - negligible_risk_area + other.negligible_risk_area, - total_area + other.total_area + natural_forest__extent + other.natural_forest__extent, + natural_forest_loss__ha + other.natural_forest_loss__ha, + negligible_risk_area__ha + other.negligible_risk_area__ha, + total_area__ha + other.total_area__ha ) } } object AFiData { def empty: AFiData = - AFiData(0, 0, 0, 0, 0) + AFiData(0, 0, 0, 0) implicit val afiDataSemigroup: Semigroup[AFiData] = new Semigroup[AFiData] { diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala index a7106842..78a81e06 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiSummary.scala @@ -53,23 +53,19 @@ object AFiSummary { val groupKey = AFiDataGroup(gadmId) - val summaryData = acc.stats.getOrElse(groupKey, AFiData(0, 0, 0, 0, 0)) - summaryData.total_area += areaHa - - if (lossYear >= 2021) { - summaryData.tree_cover_loss_area += areaHa - } + val summaryData = acc.stats.getOrElse(groupKey, AFiData(0, 0, 0, 0)) + summaryData.total_area__ha += areaHa if (negligibleRisk == "NO") { - summaryData.negligible_risk_area += areaHa + summaryData.negligible_risk_area__ha += areaHa } if (naturalForestCategory == "Natural Forest") { - summaryData.natural_forest_extent += areaHa + summaryData.natural_forest__extent += areaHa } if (lossYear >= 2021 && naturalForestCategory == "Natural Forest") { - summaryData.natural_forest_loss_area += areaHa + summaryData.natural_forest_loss__ha += areaHa } val new_stats = acc.stats.updated(groupKey, summaryData) From a54aa8c62a12d20daf731d9c3cee4ad88b20c3bc Mon Sep 17 00:00:00 2001 From: Dan Scales Date: Wed, 30 Aug 2023 14:24:55 -0700 Subject: [PATCH 28/33] Add in sample fire-alert tsv file, so can run forest_change_diagnostic locally Put example in the README of running forest_change_diagnostic locally with the sample files. And remove mention of --limit option, which doesn't exist anymore. --- README.md | 46 +++++++++++++----------- src/test/resources/sample-fire-alert.tsv | 2 ++ 2 files changed, 28 insertions(+), 20 deletions(-) create mode 100644 src/test/resources/sample-fire-alert.tsv diff --git a/README.md b/README.md index 0287ee93..ae8d6d57 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Other outputs from this analysis (loss, gain, biomass, carbon pools, etc.) use t This type of analysis only supports simple features as input. Best used together with the [ArcPY Client](https://github.com/wri/gfw_forest_loss_geotrellis_arcpy_client). ```sbt -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain treecoverloss --feature_type feature --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix --tcd 2000 --threshold 0 --threshold 30 --contextual layer is__gfw_plantations --carbon_pools +test:runMain org.globalforestwatch.summarystats.SummaryMain treecoverloss --feature_type feature --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix --tcd 2000 --threshold 0 --threshold 30 --contextual layer is__gfw_plantations --carbon_pools ``` ### Annual Update minimal @@ -53,10 +53,10 @@ For GADM there will also be summary tables with one row per ISO, ADM1, ADM2 and To produce final spreadsheets you will need to add another [post processing step](https://github.com/wri/write_country_stats). ```sbt -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain annualupdate_minimal --feature_type gadm --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain annualupdate_minimal --feature_type wdpa --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain annualupdate_minimal --feature_type geostore --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain annualupdate_minimal --feature_type feature --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix +test:runMain org.globalforestwatch.summarystats.SummaryMain annualupdate_minimal --feature_type gadm --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix +test:runMain org.globalforestwatch.summarystats.SummaryMain annualupdate_minimal --feature_type wdpa --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix +test:runMain org.globalforestwatch.summarystats.SummaryMain annualupdate_minimal --feature_type geostore --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix +test:runMain org.globalforestwatch.summarystats.SummaryMain annualupdate_minimal --feature_type feature --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix ``` ### Carbon Flux Full Standard Model @@ -67,7 +67,7 @@ It also analyzes several contextual layers that are unique to the carbon flux mo It currently only works with GADM features. ```sbt -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain carbonflux --feature_type gadm --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix +test:runMain org.globalforestwatch.summarystats.SummaryMain carbonflux --feature_type gadm --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix ``` ### Carbon Flux Sensitivity Analysis @@ -79,8 +79,8 @@ To run this model with the standard flux model output for an analysis using fewe It currently only works with GADM features. ```sbt -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain carbon_sensitivity --feature_type gadm --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix --sensitivity_type sensitivity_analysis_type -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain carbon_sensitivity --feature_type gadm --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix --sensitivity_type standard +test:runMain org.globalforestwatch.summarystats.SummaryMain carbon_sensitivity --feature_type gadm --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix --sensitivity_type sensitivity_analysis_type +test:runMain org.globalforestwatch.summarystats.SummaryMain carbon_sensitivity --feature_type gadm --tcl --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix --sensitivity_type standard ``` ### Glad Alerts @@ -99,10 +99,10 @@ Supported input features are * Simple Feature ```sbt -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain gladalerts --feature_type gadm --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain gladalerts --feature_type wdpa --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain gladalerts --feature_type geostore --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain gladalerts --feature_type feature --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] +test:runMain org.globalforestwatch.summarystats.SummaryMain gladalerts --feature_type gadm --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] +test:runMain org.globalforestwatch.summarystats.SummaryMain gladalerts --feature_type wdpa --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] +test:runMain org.globalforestwatch.summarystats.SummaryMain gladalerts --feature_type geostore --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] +test:runMain org.globalforestwatch.summarystats.SummaryMain gladalerts --feature_type feature --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] ``` ### Viirs/ MODIS Fire Alerts @@ -119,10 +119,10 @@ Supported input features are * Simple Feature ```sbt -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain firealerts --fire_alert_type MODIS/VIIRS --fire_alert_source s3://bucket/prefix/file.tsv --feature_type gadm --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain firealerts --fire_alert_type MODIS/VIIRS --fire_alert_source s3://bucket/prefix/file.tsv --feature_type wdpa --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain firealerts --fire_alert_type MODIS/VIIRS --fire_alert_source s3://bucket/prefix/file.tsv --feature_type geostore --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain firealerts --fire_alert_type MODIS/VIIRS --fire_alert_source s3://bucket/prefix/file.tsv --feature_type feature --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] +test:runMain org.globalforestwatch.summarystats.SummaryMain firealerts --fire_alert_type MODIS/VIIRS --fire_alert_source s3://bucket/prefix/file.tsv --feature_type gadm --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] +test:runMain org.globalforestwatch.summarystats.SummaryMain firealerts --fire_alert_type MODIS/VIIRS --fire_alert_source s3://bucket/prefix/file.tsv --feature_type wdpa --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] +test:runMain org.globalforestwatch.summarystats.SummaryMain firealerts --fire_alert_type MODIS/VIIRS --fire_alert_source s3://bucket/prefix/file.tsv --feature_type geostore --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] +test:runMain org.globalforestwatch.summarystats.SummaryMain firealerts --fire_alert_type MODIS/VIIRS --fire_alert_source s3://bucket/prefix/file.tsv --feature_type feature --glad --features s3://bucket/prefix/file.tsv --output s3://bucket/prefix [--change_only] ``` ### Forest Change Diagnostic @@ -133,8 +133,16 @@ options. The only supported input feature is `gfwpro`. Automatically turns on th `--split_features` option. ```sbt -sparkSubmitMain org.globalforestwatch.summarystats.SummaryMain forest_change_diagnostic --feature_type gfwpro --features s3://bucket/prefix/file.tsv --fire_alert_source s3://bucket/prefix/file.tsv --output s3://bucket/prefix +test:runMain org.globalforestwatch.summarystats.SummaryMain forest_change_diagnostic --feature_type gfwpro --features s3://bucket/prefix/file.tsv --fire_alert_source s3://bucket/prefix/file.tsv --output s3://bucket/prefix ``` + +There are two test input files available in the source tree, so you can run a sample +forest change diagnostic locally without s3 via a command like: + +```sbt +test:runMain org.globalforestwatch.summarystats.SummaryMain forest_change_diagnostic --split_features --feature_type gfwpro --features src/test/resources/palm-oil-32.tsv --fire_alert_type modis --fire_alert_source src/test/resources/sample-fire-alert.tsv --output testout +``` + ### GFW Pro Dashboard GFW Pro Dashboard computes summary statistics for the GFW Pro Dashboard. It @@ -230,10 +238,8 @@ The following options are supported: ### Local -For local testing input should be limited with `--limit` flag to minimize the time. - ```sbt -sbt:geotrellis-wri> test:runMain org.globalforestwatch.summarystats.SummaryMain annualupdate_minimal --features file:/Users/input/ten-by-ten-gadm36/wdpa__10N_010E.tsv --output file:/User/out/summary --limit 10 +sbt:geotrellis-wri> test:runMain org.globalforestwatch.summarystats.SummaryMain annualupdate_minimal --features file:/Users/input/ten-by-ten-gadm36/wdpa__10N_010E.tsv --output file:/User/out/summary ``` ### EMR diff --git a/src/test/resources/sample-fire-alert.tsv b/src/test/resources/sample-fire-alert.tsv new file mode 100644 index 00000000..b768ce84 --- /dev/null +++ b/src/test/resources/sample-fire-alert.tsv @@ -0,0 +1,2 @@ +latitude longitude acq_date acq_time confidence brightness bright_t31 frp +40.115 -83.497 2021-03-08 1845 47 305.5 291.4 4.1 From 876e166b10a8accba5c993d329670e7ff7b08870 Mon Sep 17 00:00:00 2001 From: Dan Scales Date: Thu, 10 Aug 2023 08:43:29 -0700 Subject: [PATCH 29/33] GTC-2483 Update GFW Pro diagnostics with TCL 22 This is the second push, now that the front-end folks are ready for the TCL 22 changes. Moved to version 1.10 of TCL file. Updated the test output file. --- src/main/resources/raster-catalog-pro.json | 2 +- .../ForestChangeDiagnosticAnalysis.scala | 6 ++++++ .../ForestChangeDiagnosticDataLossYearly.scala | 1 + .../ForestChangeDiagnosticDataValueYearly.scala | 1 + ...part-00000-0a0fc858-23ba-4edd-9530-0432fb3a0ea0-c000.csv | 2 -- ...part-00000-16db04de-3864-40aa-98a5-c6ca58e882ce-c000.csv | 2 ++ 6 files changed, 11 insertions(+), 3 deletions(-) delete mode 100644 src/test/resources/palm-32-fcd-output/part-00000-0a0fc858-23ba-4edd-9530-0432fb3a0ea0-c000.csv create mode 100644 src/test/resources/palm-32-fcd-output/part-00000-16db04de-3864-40aa-98a5-c6ca58e882ce-c000.csv diff --git a/src/main/resources/raster-catalog-pro.json b/src/main/resources/raster-catalog-pro.json index 69ae0bc1..ee697487 100755 --- a/src/main/resources/raster-catalog-pro.json +++ b/src/main/resources/raster-catalog-pro.json @@ -190,7 +190,7 @@ }, { "name":"umd_tree_cover_loss", - "source_uri":"s3://gfw-data-lake/umd_tree_cover_loss/v1.9/raster/epsg-4326/{grid_size}/{row_count}/year/gdal-geotiff/{tile_id}.tif" + "source_uri":"s3://gfw-data-lake/umd_tree_cover_loss/v1.10/raster/epsg-4326/{grid_size}/{row_count}/year/gdal-geotiff/{tile_id}.tif" }, { "name":"gfw_managed_forests", diff --git a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticAnalysis.scala index 44e67a2b..cb14b698 100755 --- a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticAnalysis.scala @@ -91,6 +91,10 @@ object ForestChangeDiagnosticAnalysis extends SummaryAnalysis { } else { data.copy( commodity_threat_fires = fire.getOrElse(ForestChangeDiagnosticDataLossYearly.empty), + // Soy is planted late in year (Sept/Oct) and harvested in + // March. So, the most recent data relates to soy planted late + // in previous year. So, we should only intersect with tree + // cover loss from previous year. tree_cover_loss_soy_yearly = data.tree_cover_loss_soy_yearly.limitToMaxYear(2021) ) } @@ -197,6 +201,8 @@ object ForestChangeDiagnosticAnalysis extends SummaryAnalysis { usingIndex = true ) + // This fire data is an input to the palm risk tool, so limit data to 2021 to sync + // with the palm risk tool. joinedRDD.rdd .map { case (poly, points) => val fid = poly.getUserData.asInstanceOf[FeatureId] diff --git a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataLossYearly.scala b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataLossYearly.scala index 9385887f..189dfcd9 100755 --- a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataLossYearly.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataLossYearly.scala @@ -56,6 +56,7 @@ object ForestChangeDiagnosticDataLossYearly { 2019 -> 0, 2020 -> 0, 2021 -> 0, + 2022 -> 0, ) ) diff --git a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataValueYearly.scala b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataValueYearly.scala index fb3386e1..449b0122 100755 --- a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataValueYearly.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDataValueYearly.scala @@ -78,6 +78,7 @@ object ForestChangeDiagnosticDataValueYearly { 2019 -> 0, 2020 -> 0, 2021 -> 0, + 2022 -> 0, ) ) diff --git a/src/test/resources/palm-32-fcd-output/part-00000-0a0fc858-23ba-4edd-9530-0432fb3a0ea0-c000.csv b/src/test/resources/palm-32-fcd-output/part-00000-0a0fc858-23ba-4edd-9530-0432fb3a0ea0-c000.csv deleted file mode 100644 index e4dc0cc2..00000000 --- a/src/test/resources/palm-32-fcd-output/part-00000-0a0fc858-23ba-4edd-9530-0432fb3a0ea0-c000.csv +++ /dev/null @@ -1,2 +0,0 @@ -list_id location_id status_code location_error tree_cover_loss_total_yearly tree_cover_loss_primary_forest_yearly tree_cover_loss_peat_yearly tree_cover_loss_intact_forest_yearly tree_cover_loss_protected_areas_yearly tree_cover_loss_arg_otbn_yearly tree_cover_loss_sea_landcover_yearly tree_cover_loss_idn_landcover_yearly tree_cover_loss_soy_yearly tree_cover_loss_idn_legal_yearly tree_cover_loss_idn_forest_moratorium_yearly tree_cover_loss_prodes_amazon_yearly tree_cover_loss_prodes_cerrado_yearly tree_cover_loss_prodes_amazon_wdpa_yearly tree_cover_loss_prodes_cerrado_wdpa_yearly tree_cover_loss_prodes_amazon_primary_forest_yearly tree_cover_loss_prodes_cerrado_primary_forest_yearly tree_cover_loss_brazil_biomes_yearly tree_cover_extent_total tree_cover_extent_primary_forest tree_cover_extent_protected_areas tree_cover_extent_peat tree_cover_extent_intact_forest natural_habitat_primary natural_habitat_intact_forest total_area protected_areas_area peat_area arg_otbn_area brazil_biomes idn_legal_area sea_landcover_area idn_landcover_area idn_forest_moratorium_area south_america_presence legal_amazon_presence brazil_biomes_presence cerrado_biome_presence southeast_asia_presence indonesia_presence argentina_presence commodity_value_forest_extent commodity_value_peat commodity_value_protected_areas commodity_threat_deforestation commodity_threat_peat commodity_threat_protected_areas commodity_threat_fires -1 31 2 {"2001":1021.7622,"2002":851.014,"2003":310.1835,"2004":2169.8398,"2005":2325.3843,"2006":4162.4968,"2007":2968.7863,"2008":4015.4403,"2009":2002.9194,"2010":1173.7001,"2011":1703.6902,"2012":2838.0498,"2013":1841.7568,"2014":2468.7732,"2015":2028.9672,"2016":3344.8135,"2017":1026.7609,"2018":525.5327,"2019":618.7052,"2020":924.699,"2021":857.8225} {"2001":154.8617,"2002":306.7253,"2003":92.3781,"2004":717.7405,"2005":1202.6952,"2006":1831.5766,"2007":1668.2764,"2008":1753.2317,"2009":797.282,"2010":454.5023,"2011":872.3613,"2012":1251.8543,"2013":1083.6799,"2014":1290.2177,"2015":1360.2574,"2016":2313.5001,"2017":286.2809,"2018":159.8557,"2019":162.3929,"2020":134.2652,"2021":167.4697} {"2001":557.4251,"2002":236.2539,"2003":71.8566,"2004":741.25,"2005":957.52,"2006":1229.3335,"2007":1037.5018,"2008":891.235,"2009":486.4665,"2010":363.5759,"2011":411.9212,"2012":1078.9246,"2013":862.5621,"2014":974.783,"2015":942.4571,"2016":1472.8429,"2017":211.3403,"2018":144.7173,"2019":148.7917,"2020":142.3323,"2021":122.7372} {} {"2001":42.2692,"2002":228.1732,"2003":11.3743,"2004":3.9196,"2005":1.614,"2006":15.3711,"2007":4.4576,"2008":2.8437,"2009":4.765,"2010":7.9931,"2011":10.7597,"2012":8.1466,"2013":0.1537,"2014":8.5307,"2015":18.6758,"2016":139.2616,"2017":10.7596,"2018":0.3843,"2019":0.2306,"2020":0.0,"2021":0.0769} {} {"Rubber plantation":{"2001":3.0745,"2002":16.5256,"2003":36.0493,"2004":66.1791,"2005":73.4812,"2006":25.9797,"2007":5.9184,"2008":56.571,"2009":47.7317,"2010":33.3581,"2011":21.9825,"2012":52.9583,"2013":11.9137,"2014":42.2742,"2015":34.2038,"2016":63.4883,"2017":10.6839,"2018":24.4423,"2019":22.1363,"2020":10.4533,"2021":25.826},"Secondary forest":{"2001":240.1012,"2002":352.6874,"2003":51.186,"2004":522.8408,"2005":879.6014,"2006":1310.6826,"2007":981.6686,"2008":756.8744,"2009":359.2934,"2010":232.485,"2011":575.4717,"2012":1110.4372,"2013":787.2514,"2014":772.2979,"2015":966.528,"2016":1571.8466,"2017":149.9382,"2018":89.3794,"2019":136.8781,"2020":121.8915,"2021":68.6318},"Agriculture":{"2001":3.151,"2002":9.1452,"2003":5.4563,"2004":53.8715,"2005":30.3561,"2006":22.9009,"2007":6.5323,"2008":10.9893,"2009":159.7649,"2010":38.7323,"2011":100.4403,"2012":104.3592,"2013":15.3698,"2014":35.8124,"2015":19.6734,"2016":38.1942,"2017":19.2886,"2018":10.5282,"2019":11.2197,"2020":7.9922,"2021":12.1419},"Oil palm plantation":{"2001":389.5357,"2002":222.339,"2003":103.9797,"2004":96.4524,"2005":67.8614,"2006":368.7244,"2007":440.2632,"2008":428.9814,"2009":151.7946,"2010":184.5942,"2011":113.5139,"2012":263.0128,"2013":147.9443,"2014":88.3878,"2015":58.1061,"2016":70.7105,"2017":44.5029,"2018":31.2823,"2019":233.0475,"2020":526.052,"2021":395.9972},"Swamp":{"2001":265.2372,"2002":112.4372,"2003":38.2726,"2004":648.495,"2005":548.2747,"2006":855.4703,"2007":1129.3025,"2008":2086.08,"2009":484.4962,"2010":300.1085,"2011":526.378,"2012":478.8799,"2013":482.4034,"2014":742.3953,"2015":446.518,"2016":539.8938,"2017":620.8959,"2018":204.1215,"2019":105.5176,"2020":122.2736,"2021":197.9767},"Settlements":{"2001":0.1537,"2002":0.9992,"2003":0.0,"2004":0.6918,"2005":0.1537,"2006":1.1529,"2007":1.1529,"2008":0.538,"2009":1.0761,"2010":0.8455,"2011":1.1529,"2012":0.8454,"2013":0.0,"2014":0.6918,"2015":0.1537,"2016":0.2306,"2017":0.3843,"2018":0.0,"2019":0.1537,"2020":1.1529,"2021":1.3067},"Grassland/shrub":{"2001":59.3337,"2002":89.231,"2003":37.5821,"2004":445.7701,"2005":432.4583,"2006":514.3995,"2007":235.9463,"2008":500.7963,"2009":334.6362,"2010":269.6786,"2011":186.2981,"2012":378.8895,"2013":330.4736,"2014":424.3189,"2015":165.2413,"2016":151.5619,"2017":77.7013,"2018":84.6964,"2019":29.59,"2020":91.3842,"2021":53.8004},"Primary forest":{"2001":41.1934,"2002":30.6653,"2003":13.68,"2004":98.6793,"2005":209.8123,"2006":379.429,"2007":115.8962,"2008":96.2208,"2009":368.2156,"2010":47.8819,"2011":42.0413,"2012":228.795,"2013":26.1305,"2014":255.8481,"2015":270.3755,"2016":823.8133,"2017":81.5399,"2018":47.9595,"2019":64.4845,"2020":22.7495,"2021":71.7856},"Water bodies":{"2001":0.8454,"2002":0.0768,"2003":0.0,"2004":0.1537,"2005":0.0,"2006":0.0,"2007":0.0769,"2008":0.0,"2009":0.0,"2010":0.1537,"2011":0.2306,"2012":0.6916,"2013":0.6917,"2014":0.6148,"2015":0.0,"2016":0.2306,"2017":0.0768,"2018":0.0,"2019":0.0,"2020":0.0,"2021":0.2305},"Mixed tree crops":{"2001":19.1363,"2002":16.9073,"2003":23.9776,"2004":236.7062,"2005":83.3852,"2006":683.7575,"2007":52.029,"2008":78.3891,"2009":95.9108,"2010":65.8624,"2011":136.1808,"2012":219.1809,"2013":39.5784,"2014":106.132,"2015":68.1674,"2016":84.8439,"2017":21.749,"2018":33.1229,"2019":15.6777,"2020":20.7498,"2021":30.1257}} {"Bare land":{"2001":3.8428,"2002":35.2766,"2003":5.3801,"2004":14.4491,"2005":17.6005,"2006":39.8116,"2007":99.1447,"2008":141.5687,"2009":59.9482,"2010":20.7508,"2011":136.3415,"2012":129.3478,"2013":94.2991,"2014":83.0794,"2015":280.0642,"2016":735.1371,"2017":28.9729,"2018":36.1198,"2019":8.3774,"2020":7.1477,"2021":8.0699},"Mining":{"2001":7.301,"2002":2.7666,"2003":5.2258,"2004":11.9889,"2005":15.2172,"2006":9.1456,"2007":7.6082,"2008":34.8914,"2009":16.9072,"2010":8.9918,"2011":12.4502,"2012":29.5112,"2013":1.0759,"2014":16.8304,"2015":2.5362,"2016":1.9982,"2017":1.7676,"2018":0.7685,"2019":0.3074,"2020":0.4611,"2021":1.7676},"Settlement":{"2001":30.2802,"2002":84.4598,"2003":6.7627,"2004":5.226,"2005":1.9982,"2006":15.5239,"2007":5.3029,"2008":146.7178,"2009":9.1456,"2010":5.9944,"2011":8.2999,"2012":20.9038,"2013":6.4557,"2014":10.4519,"2015":14.0641,"2016":20.5962,"2017":9.6834,"2018":5.9946,"2019":5.6871,"2020":7.5318,"2021":7.5314},"Secondary forest":{"2001":14.8329,"2002":34.5077,"2003":10.3753,"2004":63.4026,"2005":86.5381,"2006":58.5628,"2007":81.0051,"2008":222.8806,"2009":92.1507,"2010":46.96,"2011":105.6723,"2012":258.1458,"2013":358.5935,"2014":604.2224,"2015":692.3818,"2016":1208.3837,"2017":259.4575,"2018":110.7479,"2019":151.2503,"2020":92.763,"2021":119.8204},"Agriculture":{"2001":87.6883,"2002":42.4224,"2003":18.9819,"2004":289.878,"2005":266.1325,"2006":746.3828,"2007":376.2672,"2008":177.9118,"2009":282.8805,"2010":120.1185,"2011":271.2771,"2012":638.4726,"2013":155.6217,"2014":248.7641,"2015":220.5643,"2016":382.8723,"2017":105.1299,"2018":55.5624,"2019":44.6495,"2020":56.5613,"2021":42.3446},"Swamp":{"2001":110.6023,"2002":161.6235,"2003":30.1276,"2004":349.3156,"2005":345.4651,"2006":346.3096,"2007":162.935,"2008":218.7309,"2009":146.9478,"2010":95.2199,"2011":132.4202,"2012":382.8885,"2013":159.6257,"2014":317.4138,"2015":296.5912,"2016":418.5593,"2017":115.2812,"2018":74.3201,"2019":85.1564,"2020":78.4694,"2021":128.1948},"Grassland/shrub":{"2001":4.9185,"2002":20.2891,"2003":11.7584,"2004":38.7334,"2005":22.748,"2006":135.7978,"2007":15.2937,"2008":74.7011,"2009":35.6594,"2010":20.4429,"2011":50.7993,"2012":105.518,"2013":11.4509,"2014":46.2648,"2015":55.7949,"2016":62.8662,"2017":264.4497,"2018":34.5065,"2019":9.8372,"2020":5.1492,"2021":6.9934},"Estate crop plantation":{"2001":759.8369,"2002":469.6682,"2003":221.0338,"2004":1396.0776,"2005":1569.454,"2006":2808.3496,"2007":2218.0015,"2008":2990.2754,"2009":1359.0495,"2010":854.7606,"2011":983.3553,"2012":1269.5731,"2013":1053.2509,"2014":1138.9796,"2015":466.1251,"2016":509.328,"2017":239.7129,"2018":205.5913,"2019":312.8252,"2020":676.3081,"2021":539.8723},"Body of water":{"2001":2.4593,"2002":0.0,"2003":0.538,"2004":0.7685,"2005":0.2306,"2006":2.6132,"2007":3.228,"2008":7.7625,"2009":0.2306,"2010":0.4611,"2011":3.0743,"2012":3.689,"2013":1.3834,"2014":2.7668,"2015":0.8454,"2016":5.0725,"2017":2.3057,"2018":1.9215,"2019":0.6148,"2020":0.3074,"2021":3.228}} {} {"Other Utilization Area":{"2001":712.0267,"2002":482.1867,"2003":221.5682,"2004":1414.5116,"2005":1126.5942,"2006":2837.7298,"2007":1853.8397,"2008":3013.7624,"2009":1165.5631,"2010":833.4598,"2011":1098.1437,"2012":1865.7614,"2013":971.0994,"2014":1259.078,"2015":657.4037,"2016":999.5492,"2017":622.2031,"2018":279.4429,"2019":376.9137,"2020":705.8893,"2021":595.4311},"Production Forest":{"2001":113.1434,"2002":80.4763,"2003":7.6858,"2004":26.0567,"2005":48.7316,"2006":183.5469,"2007":84.3149,"2008":147.2649,"2009":84.4729,"2010":56.57,"2011":110.1392,"2012":156.3372,"2013":49.1136,"2014":173.4782,"2015":154.1063,"2016":334.2621,"2017":35.1256,"2018":10.4532,"2019":17.4472,"2020":24.9028,"2021":17.5241},"Converted Production Forest":{"2001":151.8635,"2002":60.1778,"2003":69.0172,"2004":724.5834,"2005":1148.2139,"2006":1123.3127,"2007":1023.561,"2008":844.268,"2009":747.8878,"2010":275.2161,"2011":481.5731,"2012":804.1156,"2013":820.0067,"2014":1024.9196,"2015":1197.936,"2016":1866.668,"2017":356.367,"2018":233.3308,"2019":223.499,"2020":193.5994,"2021":241.5624},"Sanctuary Reserves/Nature Conservation Area":{"2001":42.2692,"2002":228.1732,"2003":11.3743,"2004":3.9196,"2005":1.614,"2006":15.3711,"2007":4.4576,"2008":2.8437,"2009":4.765,"2010":7.9931,"2011":10.7597,"2012":8.1466,"2013":0.1537,"2014":8.5307,"2015":18.6758,"2016":139.2616,"2017":10.7596,"2018":0.3843,"2019":0.2306,"2020":0.0,"2021":0.0769}} {"2001":85.0014,"2002":248.2325,"2003":18.829,"2004":97.8293,"2005":96.2941,"2006":176.9875,"2007":138.7928,"2008":129.4126,"2009":109.4342,"2010":65.0144,"2011":100.5959,"2012":428.132,"2013":566.3779,"2014":467.2467,"2015":304.2577,"2016":712.6515,"2017":145.3232,"2018":56.2574,"2019":82.8502,"2020":54.0272,"2021":24.2097} {} {} {} {} {} {} {} 76338.8266 34513.678 6014.0986 23301.5138 0.0 34530.8164 0.0 125583.7284 6614.0107 31984.9079 {} {} {"Other Utilization Area":81822.5991,"Production Forest":4848.8827,"Converted Production Forest":28600.5448,"Sanctuary Reserves/Nature Conservation Area":6614.0107} {"Rubber plantation":3542.3364,"Secondary forest":24896.0268,"Agriculture":2763.3729,"Oil palm plantation":24398.5485,"Swamp":29043.6031,"Settlements":851.2415,"Grassland/shrub":22752.6953,"Primary forest":8261.1709,"Water bodies":3133.0348,"Mixed tree crops":5941.6982} {"Bare land":2902.1353,"Mining":1392.3433,"Settlement":5798.0268,"Secondary forest":21966.5842,"Agriculture":9963.5593,"Swamp":7625.7847,"Grassland/shrub":2875.049,"Estate crop plantation":69353.0242,"Body of water":3707.2217} 13342.6717 false false false false true true false {"2002":24579.8084,"2003":24451.4643,"2004":24189.9373,"2005":24132.0676,"2006":23677.7243,"2007":23217.4612,"2008":22247.8139,"2009":21697.0839,"2010":21235.5008,"2011":20791.3746,"2012":20560.122,"2013":20057.1275,"2014":19054.2097,"2015":18742.9578,"2016":18210.5965,"2017":17209.7962,"2018":15452.9753,"2019":15189.6774,"2020":15071.8608,"2021":14916.0005} {"2002":31984.9079,"2003":31984.9079,"2004":31984.9079,"2005":31984.9079,"2006":31984.9079,"2007":31984.9079,"2008":31984.9079,"2009":31984.9079,"2010":31984.9079,"2011":31984.9079,"2012":31984.9079,"2013":31984.9079,"2014":31984.9079,"2015":31984.9079,"2016":31984.9079,"2017":31984.9079,"2018":31984.9079,"2019":31984.9079,"2020":31984.9079,"2021":31984.9079} {"2002":6614.0107,"2003":6614.0107,"2004":6614.0107,"2005":6614.0107,"2006":6614.0107,"2007":6614.0107,"2008":6614.0107,"2009":6614.0107,"2010":6614.0107,"2011":6614.0107,"2012":6614.0107,"2013":6614.0107,"2014":6614.0107,"2015":6614.0107,"2016":6614.0107,"2017":6614.0107,"2018":6614.0107,"2019":6614.0107,"2020":6614.0107,"2021":6614.0107} {"2002":389.8711,"2003":319.3967,"2004":512.213,"2005":914.6063,"2006":1429.9104,"2007":1520.3773,"2008":1012.313,"2009":905.7094,"2010":675.3788,"2011":734.247,"2012":1505.9123,"2013":1314.1698,"2014":843.6132,"2015":1533.1615,"2016":2757.6213,"2017":2020.1188,"2018":381.1145,"2019":273.6768,"2020":272.6016,"2021":245.5511} {"2002":13363.7925,"2003":13309.3807,"2004":13472.9171,"2005":13694.9388,"2006":14063.9863,"2007":14195.4089,"2008":13789.4768,"2009":13613.3257,"2010":13540.0855,"2011":13478.2203,"2012":13889.142,"2013":13891.8339,"2014":13604.4896,"2015":14052.9405,"2016":14710.2098,"2017":14207.349,"2018":13390.077,"2019":13385.3927,"2020":13385.8532,"2021":13355.5725} {"2002":222.102,"2003":200.4294,"2004":9.6067,"2005":0.4611,"2006":3.7659,"2007":3.8427,"2008":0.1537,"2009":1.2297,"2010":2.8437,"2011":2.9205,"2012":3.2279,"2013":1.9982,"2014":0.3843,"2015":13.7571,"2016":74.1656,"2017":63.0214,"2018":2.2287,"2019":0.0,"2020":0.0,"2021":0.0} {} diff --git a/src/test/resources/palm-32-fcd-output/part-00000-16db04de-3864-40aa-98a5-c6ca58e882ce-c000.csv b/src/test/resources/palm-32-fcd-output/part-00000-16db04de-3864-40aa-98a5-c6ca58e882ce-c000.csv new file mode 100644 index 00000000..5d76cb71 --- /dev/null +++ b/src/test/resources/palm-32-fcd-output/part-00000-16db04de-3864-40aa-98a5-c6ca58e882ce-c000.csv @@ -0,0 +1,2 @@ +list_id location_id status_code location_error tree_cover_loss_total_yearly tree_cover_loss_primary_forest_yearly tree_cover_loss_peat_yearly tree_cover_loss_intact_forest_yearly tree_cover_loss_protected_areas_yearly tree_cover_loss_arg_otbn_yearly tree_cover_loss_sea_landcover_yearly tree_cover_loss_idn_landcover_yearly tree_cover_loss_soy_yearly tree_cover_loss_idn_legal_yearly tree_cover_loss_idn_forest_moratorium_yearly tree_cover_loss_prodes_amazon_yearly tree_cover_loss_prodes_cerrado_yearly tree_cover_loss_prodes_amazon_wdpa_yearly tree_cover_loss_prodes_cerrado_wdpa_yearly tree_cover_loss_prodes_amazon_primary_forest_yearly tree_cover_loss_prodes_cerrado_primary_forest_yearly tree_cover_loss_brazil_biomes_yearly tree_cover_extent_total tree_cover_extent_primary_forest tree_cover_extent_protected_areas tree_cover_extent_peat tree_cover_extent_intact_forest natural_habitat_primary natural_habitat_intact_forest total_area protected_areas_area peat_area arg_otbn_area brazil_biomes idn_legal_area sea_landcover_area idn_landcover_area idn_forest_moratorium_area south_america_presence legal_amazon_presence brazil_biomes_presence cerrado_biome_presence southeast_asia_presence indonesia_presence argentina_presence commodity_value_forest_extent commodity_value_peat commodity_value_protected_areas commodity_threat_deforestation commodity_threat_peat commodity_threat_protected_areas commodity_threat_fires +1 31 2 {"2001":1021.7622,"2002":851.014,"2003":310.1835,"2004":2169.8398,"2005":2325.3843,"2006":4162.4968,"2007":2968.7863,"2008":4015.4403,"2009":2002.9194,"2010":1173.7001,"2011":1703.6902,"2012":2838.0498,"2013":1841.7568,"2014":2468.7732,"2015":2028.9672,"2016":3344.8135,"2017":1026.7609,"2018":525.5327,"2019":618.7052,"2020":924.699,"2021":857.8225,"2022":560.0482} {"2001":154.8617,"2002":306.7253,"2003":92.3781,"2004":717.7405,"2005":1202.6952,"2006":1831.5766,"2007":1668.2764,"2008":1753.2317,"2009":797.282,"2010":454.5023,"2011":872.3613,"2012":1251.8543,"2013":1083.6799,"2014":1290.2177,"2015":1360.2574,"2016":2313.5001,"2017":286.2809,"2018":159.8557,"2019":162.3929,"2020":134.2652,"2021":167.4697,"2022":133.6506} {"2001":557.4251,"2002":236.2539,"2003":71.8566,"2004":741.25,"2005":957.52,"2006":1229.3335,"2007":1037.5018,"2008":891.235,"2009":486.4665,"2010":363.5759,"2011":411.9212,"2012":1078.9246,"2013":862.5621,"2014":974.783,"2015":942.4571,"2016":1472.8429,"2017":211.3403,"2018":144.7173,"2019":148.7917,"2020":142.3323,"2021":122.7372,"2022":94.914} {} {"2001":42.2692,"2002":228.1732,"2003":11.3743,"2004":3.9196,"2005":1.614,"2006":15.3711,"2007":4.4576,"2008":2.8437,"2009":4.765,"2010":7.9931,"2011":10.7597,"2012":8.1466,"2013":0.1537,"2014":8.5307,"2015":18.6758,"2016":139.2616,"2017":10.7596,"2018":0.3843,"2019":0.2306,"2020":0.0,"2021":0.0769,"2022":0.0} {} {"Rubber plantation":{"2001":3.0745,"2002":16.5256,"2003":36.0493,"2004":66.1791,"2005":73.4812,"2006":25.9797,"2007":5.9184,"2008":56.571,"2009":47.7317,"2010":33.3581,"2011":21.9825,"2012":52.9583,"2013":11.9137,"2014":42.2742,"2015":34.2038,"2016":63.4883,"2017":10.6839,"2018":24.4423,"2019":22.1363,"2020":10.4533,"2021":25.826,"2022":26.1332},"Secondary forest":{"2001":240.1012,"2002":352.6874,"2003":51.186,"2004":522.8408,"2005":879.6014,"2006":1310.6826,"2007":981.6686,"2008":756.8744,"2009":359.2934,"2010":232.485,"2011":575.4717,"2012":1110.4372,"2013":787.2514,"2014":772.2979,"2015":966.528,"2016":1571.8466,"2017":149.9382,"2018":89.3794,"2019":136.8781,"2020":121.8915,"2021":68.6318,"2022":99.681},"Agriculture":{"2001":3.151,"2002":9.1452,"2003":5.4563,"2004":53.8715,"2005":30.3561,"2006":22.9009,"2007":6.5323,"2008":10.9893,"2009":159.7649,"2010":38.7323,"2011":100.4403,"2012":104.3592,"2013":15.3698,"2014":35.8124,"2015":19.6734,"2016":38.1942,"2017":19.2886,"2018":10.5282,"2019":11.2197,"2020":7.9922,"2021":12.1419,"2022":9.2218},"Oil palm plantation":{"2001":389.5357,"2002":222.339,"2003":103.9797,"2004":96.4524,"2005":67.8614,"2006":368.7244,"2007":440.2632,"2008":428.9814,"2009":151.7946,"2010":184.5942,"2011":113.5139,"2012":263.0128,"2013":147.9443,"2014":88.3878,"2015":58.1061,"2016":70.7105,"2017":44.5029,"2018":31.2823,"2019":233.0475,"2020":526.052,"2021":395.9972,"2022":105.2237},"Swamp":{"2001":265.2372,"2002":112.4372,"2003":38.2726,"2004":648.495,"2005":548.2747,"2006":855.4703,"2007":1129.3025,"2008":2086.08,"2009":484.4962,"2010":300.1085,"2011":526.378,"2012":478.8799,"2013":482.4034,"2014":742.3953,"2015":446.518,"2016":539.8938,"2017":620.8959,"2018":204.1215,"2019":105.5176,"2020":122.2736,"2021":197.9767,"2022":240.6287},"Settlements":{"2001":0.1537,"2002":0.9992,"2003":0.0,"2004":0.6918,"2005":0.1537,"2006":1.1529,"2007":1.1529,"2008":0.538,"2009":1.0761,"2010":0.8455,"2011":1.1529,"2012":0.8454,"2013":0.0,"2014":0.6918,"2015":0.1537,"2016":0.2306,"2017":0.3843,"2018":0.0,"2019":0.1537,"2020":1.1529,"2021":1.3067,"2022":1.691},"Grassland/shrub":{"2001":59.3337,"2002":89.231,"2003":37.5821,"2004":445.7701,"2005":432.4583,"2006":514.3995,"2007":235.9463,"2008":500.7963,"2009":334.6362,"2010":269.6786,"2011":186.2981,"2012":378.8895,"2013":330.4736,"2014":424.3189,"2015":165.2413,"2016":151.5619,"2017":77.7013,"2018":84.6964,"2019":29.59,"2020":91.3842,"2021":53.8004,"2022":41.7326},"Primary forest":{"2001":41.1934,"2002":30.6653,"2003":13.68,"2004":98.6793,"2005":209.8123,"2006":379.429,"2007":115.8962,"2008":96.2208,"2009":368.2156,"2010":47.8819,"2011":42.0413,"2012":228.795,"2013":26.1305,"2014":255.8481,"2015":270.3755,"2016":823.8133,"2017":81.5399,"2018":47.9595,"2019":64.4845,"2020":22.7495,"2021":71.7856,"2022":5.7642},"Water bodies":{"2001":0.8454,"2002":0.0768,"2003":0.0,"2004":0.1537,"2005":0.0,"2006":0.0,"2007":0.0769,"2008":0.0,"2009":0.0,"2010":0.1537,"2011":0.2306,"2012":0.6916,"2013":0.6917,"2014":0.6148,"2015":0.0,"2016":0.2306,"2017":0.0768,"2018":0.0,"2019":0.0,"2020":0.0,"2021":0.2305,"2022":0.1537},"Mixed tree crops":{"2001":19.1363,"2002":16.9073,"2003":23.9776,"2004":236.7062,"2005":83.3852,"2006":683.7575,"2007":52.029,"2008":78.3891,"2009":95.9108,"2010":65.8624,"2011":136.1808,"2012":219.1809,"2013":39.5784,"2014":106.132,"2015":68.1674,"2016":84.8439,"2017":21.749,"2018":33.1229,"2019":15.6777,"2020":20.7498,"2021":30.1257,"2022":29.8185}} {"Bare land":{"2001":3.8428,"2002":35.2766,"2003":5.3801,"2004":14.4491,"2005":17.6005,"2006":39.8116,"2007":99.1447,"2008":141.5687,"2009":59.9482,"2010":20.7508,"2011":136.3415,"2012":129.3478,"2013":94.2991,"2014":83.0794,"2015":280.0642,"2016":735.1371,"2017":28.9729,"2018":36.1198,"2019":8.3774,"2020":7.1477,"2021":8.0699,"2022":2.3824},"Mining":{"2001":7.301,"2002":2.7666,"2003":5.2258,"2004":11.9889,"2005":15.2172,"2006":9.1456,"2007":7.6082,"2008":34.8914,"2009":16.9072,"2010":8.9918,"2011":12.4502,"2012":29.5112,"2013":1.0759,"2014":16.8304,"2015":2.5362,"2016":1.9982,"2017":1.7676,"2018":0.7685,"2019":0.3074,"2020":0.4611,"2021":1.7676,"2022":3.5353},"Settlement":{"2001":30.2802,"2002":84.4598,"2003":6.7627,"2004":5.226,"2005":1.9982,"2006":15.5239,"2007":5.3029,"2008":146.7178,"2009":9.1456,"2010":5.9944,"2011":8.2999,"2012":20.9038,"2013":6.4557,"2014":10.4519,"2015":14.0641,"2016":20.5962,"2017":9.6834,"2018":5.9946,"2019":5.6871,"2020":7.5318,"2021":7.5314,"2022":7.7622},"Secondary forest":{"2001":14.8329,"2002":34.5077,"2003":10.3753,"2004":63.4026,"2005":86.5381,"2006":58.5628,"2007":81.0051,"2008":222.8806,"2009":92.1507,"2010":46.96,"2011":105.6723,"2012":258.1458,"2013":358.5935,"2014":604.2224,"2015":692.3818,"2016":1208.3837,"2017":259.4575,"2018":110.7479,"2019":151.2503,"2020":92.763,"2021":119.8204,"2022":109.8252},"Agriculture":{"2001":87.6883,"2002":42.4224,"2003":18.9819,"2004":289.878,"2005":266.1325,"2006":746.3828,"2007":376.2672,"2008":177.9118,"2009":282.8805,"2010":120.1185,"2011":271.2771,"2012":638.4726,"2013":155.6217,"2014":248.7641,"2015":220.5643,"2016":382.8723,"2017":105.1299,"2018":55.5624,"2019":44.6495,"2020":56.5613,"2021":42.3446,"2022":44.4967},"Swamp":{"2001":110.6023,"2002":161.6235,"2003":30.1276,"2004":349.3156,"2005":345.4651,"2006":346.3096,"2007":162.935,"2008":218.7309,"2009":146.9478,"2010":95.2199,"2011":132.4202,"2012":382.8885,"2013":159.6257,"2014":317.4138,"2015":296.5912,"2016":418.5593,"2017":115.2812,"2018":74.3201,"2019":85.1564,"2020":78.4694,"2021":128.1948,"2022":88.1532},"Grassland/shrub":{"2001":4.9185,"2002":20.2891,"2003":11.7584,"2004":38.7334,"2005":22.748,"2006":135.7978,"2007":15.2937,"2008":74.7011,"2009":35.6594,"2010":20.4429,"2011":50.7993,"2012":105.518,"2013":11.4509,"2014":46.2648,"2015":55.7949,"2016":62.8662,"2017":264.4497,"2018":34.5065,"2019":9.8372,"2020":5.1492,"2021":6.9934,"2022":55.8711},"Estate crop plantation":{"2001":759.8369,"2002":469.6682,"2003":221.0338,"2004":1396.0776,"2005":1569.454,"2006":2808.3496,"2007":2218.0015,"2008":2990.2754,"2009":1359.0495,"2010":854.7606,"2011":983.3553,"2012":1269.5731,"2013":1053.2509,"2014":1138.9796,"2015":466.1251,"2016":509.328,"2017":239.7129,"2018":205.5913,"2019":312.8252,"2020":676.3081,"2021":539.8723,"2022":247.7148},"Body of water":{"2001":2.4593,"2002":0.0,"2003":0.538,"2004":0.7685,"2005":0.2306,"2006":2.6132,"2007":3.228,"2008":7.7625,"2009":0.2306,"2010":0.4611,"2011":3.0743,"2012":3.689,"2013":1.3834,"2014":2.7668,"2015":0.8454,"2016":5.0725,"2017":2.3057,"2018":1.9215,"2019":0.6148,"2020":0.3074,"2021":3.228,"2022":0.3074}} {} {"Other Utilization Area":{"2001":712.0267,"2002":482.1867,"2003":221.5682,"2004":1414.5116,"2005":1126.5942,"2006":2837.7298,"2007":1853.8397,"2008":3013.7624,"2009":1165.5631,"2010":833.4598,"2011":1098.1437,"2012":1865.7614,"2013":971.0994,"2014":1259.078,"2015":657.4037,"2016":999.5492,"2017":622.2031,"2018":279.4429,"2019":376.9137,"2020":705.8893,"2021":595.4311,"2022":299.8162},"Production Forest":{"2001":113.1434,"2002":80.4763,"2003":7.6858,"2004":26.0567,"2005":48.7316,"2006":183.5469,"2007":84.3149,"2008":147.2649,"2009":84.4729,"2010":56.57,"2011":110.1392,"2012":156.3372,"2013":49.1136,"2014":173.4782,"2015":154.1063,"2016":334.2621,"2017":35.1256,"2018":10.4532,"2019":17.4472,"2020":24.9028,"2021":17.5241,"2022":26.3625},"Converted Production Forest":{"2001":151.8635,"2002":60.1778,"2003":69.0172,"2004":724.5834,"2005":1148.2139,"2006":1123.3127,"2007":1023.561,"2008":844.268,"2009":747.8878,"2010":275.2161,"2011":481.5731,"2012":804.1156,"2013":820.0067,"2014":1024.9196,"2015":1197.936,"2016":1866.668,"2017":356.367,"2018":233.3308,"2019":223.499,"2020":193.5994,"2021":241.5624,"2022":233.5622},"Sanctuary Reserves/Nature Conservation Area":{"2001":42.2692,"2002":228.1732,"2003":11.3743,"2004":3.9196,"2005":1.614,"2006":15.3711,"2007":4.4576,"2008":2.8437,"2009":4.765,"2010":7.9931,"2011":10.7597,"2012":8.1466,"2013":0.1537,"2014":8.5307,"2015":18.6758,"2016":139.2616,"2017":10.7596,"2018":0.3843,"2019":0.2306,"2020":0.0,"2021":0.0769,"2022":0.0}} {"2001":85.0014,"2002":248.2325,"2003":18.829,"2004":97.8293,"2005":96.2941,"2006":176.9875,"2007":138.7928,"2008":129.4126,"2009":109.4342,"2010":65.0144,"2011":100.5959,"2012":428.132,"2013":566.3779,"2014":467.2467,"2015":304.2577,"2016":712.6515,"2017":145.3232,"2018":56.2574,"2019":82.8502,"2020":54.0272,"2021":24.2097,"2022":14.7553} {} {} {} {} {} {} {} 76338.8266 34513.678 6014.0986 23301.5138 0.0 34530.8164 0.0 125583.7284 6614.0107 31984.9079 {} {} {"Other Utilization Area":81822.5991,"Production Forest":4848.8827,"Converted Production Forest":28600.5448,"Sanctuary Reserves/Nature Conservation Area":6614.0107} {"Rubber plantation":3542.3364,"Secondary forest":24896.0268,"Agriculture":2763.3729,"Oil palm plantation":24398.5485,"Swamp":29043.6031,"Settlements":851.2415,"Grassland/shrub":22752.6953,"Primary forest":8261.1709,"Water bodies":3133.0348,"Mixed tree crops":5941.6982} {"Bare land":2902.1353,"Mining":1392.3433,"Settlement":5798.0268,"Secondary forest":21966.5842,"Agriculture":9963.5593,"Swamp":7625.7847,"Grassland/shrub":2875.049,"Estate crop plantation":69353.0242,"Body of water":3707.2217} 13342.6717 false false false false true true false {"2002":24579.8084,"2003":24451.4643,"2004":24189.9373,"2005":24132.0676,"2006":23677.7243,"2007":23217.4612,"2008":22247.8139,"2009":21697.0839,"2010":21235.5008,"2011":20791.3746,"2012":20560.122,"2013":20057.1275,"2014":19054.2097,"2015":18742.9578,"2016":18210.5965,"2017":17209.7962,"2018":15452.9753,"2019":15189.6774,"2020":15071.8608,"2021":14916.0005} {"2002":31984.9079,"2003":31984.9079,"2004":31984.9079,"2005":31984.9079,"2006":31984.9079,"2007":31984.9079,"2008":31984.9079,"2009":31984.9079,"2010":31984.9079,"2011":31984.9079,"2012":31984.9079,"2013":31984.9079,"2014":31984.9079,"2015":31984.9079,"2016":31984.9079,"2017":31984.9079,"2018":31984.9079,"2019":31984.9079,"2020":31984.9079,"2021":31984.9079} {"2002":6614.0107,"2003":6614.0107,"2004":6614.0107,"2005":6614.0107,"2006":6614.0107,"2007":6614.0107,"2008":6614.0107,"2009":6614.0107,"2010":6614.0107,"2011":6614.0107,"2012":6614.0107,"2013":6614.0107,"2014":6614.0107,"2015":6614.0107,"2016":6614.0107,"2017":6614.0107,"2018":6614.0107,"2019":6614.0107,"2020":6614.0107,"2021":6614.0107} {"2002":389.8711,"2003":319.3967,"2004":512.213,"2005":914.6063,"2006":1429.9104,"2007":1520.3773,"2008":1012.313,"2009":905.7094,"2010":675.3788,"2011":734.247,"2012":1505.9123,"2013":1314.1698,"2014":843.6132,"2015":1533.1615,"2016":2757.6213,"2017":2020.1188,"2018":381.1145,"2019":273.6768,"2020":272.6016,"2021":245.5511} {"2002":13363.7925,"2003":13309.3807,"2004":13472.9171,"2005":13694.9388,"2006":14063.9863,"2007":14195.4089,"2008":13789.4768,"2009":13613.3257,"2010":13540.0855,"2011":13478.2203,"2012":13889.142,"2013":13891.8339,"2014":13604.4896,"2015":14052.9405,"2016":14710.2098,"2017":14207.349,"2018":13390.077,"2019":13385.3927,"2020":13385.8532,"2021":13355.5725} {"2002":222.102,"2003":200.4294,"2004":9.6067,"2005":0.4611,"2006":3.7659,"2007":3.8427,"2008":0.1537,"2009":1.2297,"2010":2.8437,"2011":2.9205,"2012":3.2279,"2013":1.9982,"2014":0.3843,"2015":13.7571,"2016":74.1656,"2017":63.0214,"2018":2.2287,"2019":0.0,"2020":0.0,"2021":0.0} {} From 8255ad22e2efa604676940dc024bcdfd9c7c78f3 Mon Sep 17 00:00:00 2001 From: manukala6 Date: Mon, 18 Sep 2023 14:03:29 -0700 Subject: [PATCH 30/33] GTC-2570 Filter out rows not requiring analysis --- .../org/globalforestwatch/summarystats/afi/AFiAnalysis.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index 742d7cfc..45ee9635 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -48,6 +48,8 @@ object AFiAnalysis extends SummaryAnalysis { val summaryDF = AFiAnalysis.aggregateResults( AFiDF .getFeatureDataFrame(summaryRDD, spark) + .filter($"location_id" =!= -2) + .filter(!$"gadm_id".contains("null")) .withColumn( "gadm_id", when(col("location_id") =!= -1|| col("gadm_id").contains("null"), lit("") ).otherwise(col("gadm_id")) ) From 4d8e7e5d2c1f7f4eeb95d64ef712d80ce7e865da Mon Sep 17 00:00:00 2001 From: manukala6 Date: Tue, 19 Sep 2023 08:22:48 -0700 Subject: [PATCH 31/33] GTC-2570 Move filter to features df --- .../org/globalforestwatch/summarystats/afi/AFiAnalysis.scala | 2 -- .../scala/org/globalforestwatch/summarystats/afi/AFiDF.scala | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index 45ee9635..742d7cfc 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -48,8 +48,6 @@ object AFiAnalysis extends SummaryAnalysis { val summaryDF = AFiAnalysis.aggregateResults( AFiDF .getFeatureDataFrame(summaryRDD, spark) - .filter($"location_id" =!= -2) - .filter(!$"gadm_id".contains("null")) .withColumn( "gadm_id", when(col("location_id") =!= -1|| col("gadm_id").contains("null"), lit("") ).otherwise(col("gadm_id")) ) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala index a1b49ac0..cd094ae9 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala @@ -40,5 +40,7 @@ object AFiDF extends SummaryDF { } .toDF("id", "error", "dataGroup", "data") .select($"id.*", $"error.*", $"dataGroup.*", $"data.*") + .filter($"location_id" =!= -2) + .filter(!$"gadm_id".contains("null")) } } From 5d3516fb4f64a0336bfe2301b1dc54b5b4140d48 Mon Sep 17 00:00:00 2001 From: Dan Scales Date: Wed, 20 Sep 2023 10:08:49 -0700 Subject: [PATCH 32/33] GPTV-2215 Revert pull #175, the PRODES split by Brazilian biome Priorities changed, and this was never implemented on the front-end, so we need to revert these PRODES-split changes. Re-created the test output file needed, given prodes-split removed, but TCL 2022 added. --- src/main/resources/raster-catalog-pro.json | 12 +- .../layers/ProdesAmazonLossYear.scala | 12 - .../layers/ProdesCerradoLossYear.scala | 12 - .../ForestChangeDiagnosticDF.scala | 42 ++- .../ForestChangeDiagnosticData.scala | 97 +++---- .../ForestChangeDiagnosticGridSources.scala | 30 +-- .../ForestChangeDiagnosticRawDataGroup.scala | 247 +++++++++--------- .../ForestChangeDiagnosticSummary.scala | 39 ++- .../ForestChangeDiagnosticTile.scala | 41 +-- ...d5d0-56c1-4840-8264-9b4a48a83e9c-c000.csv} | 4 +- .../ForestChangeDiagnosticAnalysisSpec.scala | 3 +- 11 files changed, 241 insertions(+), 298 deletions(-) delete mode 100644 src/main/scala/org/globalforestwatch/layers/ProdesAmazonLossYear.scala delete mode 100644 src/main/scala/org/globalforestwatch/layers/ProdesCerradoLossYear.scala rename src/test/resources/palm-32-fcd-output/{part-00000-16db04de-3864-40aa-98a5-c6ca58e882ce-c000.csv => part-00000-4a23d5d0-56c1-4840-8264-9b4a48a83e9c-c000.csv} (72%) diff --git a/src/main/resources/raster-catalog-pro.json b/src/main/resources/raster-catalog-pro.json index ee697487..88ed7f20 100755 --- a/src/main/resources/raster-catalog-pro.json +++ b/src/main/resources/raster-catalog-pro.json @@ -197,12 +197,8 @@ "source_uri":"s3://gfw-data-lake/gfw_managed_forests/v202106/raster/epsg-4326/{grid_size}/{row_count}/is/gdal-geotiff/{tile_id}.tif" }, { - "name":"inpe_amazon_prodes", - "source_uri":"s3://gfw-data-lake/inpe_amazon_prodes/v2021/raster/epsg-4326/{grid_size}/{row_count}/year/geotiff/{tile_id}.tif" - }, - { - "name":"inpe_cerrado_prodes", - "source_uri":"s3://gfw-data-lake/inpe_cerrado_prodes/v2021/raster/epsg-4326/{grid_size}/{row_count}/year/geotiff/{tile_id}.tif" + "name":"inpe_prodes", + "source_uri":"s3://gfw-data-lake/inpe_prodes/v202107/raster/epsg-4326/{grid_size}/{row_count}/is/gdal-geotiff/{tile_id}.tif" }, { "name":"gfw_mining_concessions", @@ -276,6 +272,10 @@ "name":"umd_soy_planted_area", "source_uri": "s3://gfw-data-lake/umd_soy_planted_area/v2/raster/epsg-4326/{grid_size}/{row_count}/is__year_2021/gdal-geotiff/{tile_id}.tif" }, + { + "name":"inpe_prodes", + "source_uri": "s3://gfw-data-lake/inpe_prodes/v202107/raster/epsg-4326/{grid_size}/{row_count}/is/gdal-geotiff/{tile_id}.tif" + }, { "name":"wwf_eco_regions", "source_uri": "s3://gfw-data-lake/wwf_eco_regions/v2012/raster/epsg-4326/{grid_size}/{row_count}/name/gdal-geotiff/{tile_id}.tif" diff --git a/src/main/scala/org/globalforestwatch/layers/ProdesAmazonLossYear.scala b/src/main/scala/org/globalforestwatch/layers/ProdesAmazonLossYear.scala deleted file mode 100644 index d89deae0..00000000 --- a/src/main/scala/org/globalforestwatch/layers/ProdesAmazonLossYear.scala +++ /dev/null @@ -1,12 +0,0 @@ -package org.globalforestwatch.layers - -import org.globalforestwatch.grids.GridTile - -case class ProdesAmazonLossYear(gridTile: GridTile, kwargs: Map[String, Any]) extends IntegerLayer with OptionalILayer { - val datasetName = "inpe_amazon_prodes" - val uri: String = - uriForGrid(gridTile, kwargs) - - override def lookup(value: Int): Integer = - if (value == 0) null else value -} diff --git a/src/main/scala/org/globalforestwatch/layers/ProdesCerradoLossYear.scala b/src/main/scala/org/globalforestwatch/layers/ProdesCerradoLossYear.scala deleted file mode 100644 index 87a0c5b2..00000000 --- a/src/main/scala/org/globalforestwatch/layers/ProdesCerradoLossYear.scala +++ /dev/null @@ -1,12 +0,0 @@ -package org.globalforestwatch.layers - -import org.globalforestwatch.grids.GridTile - -case class ProdesCerradoLossYear(gridTile: GridTile, kwargs: Map[String, Any]) extends IntegerLayer with OptionalILayer { - val datasetName = "inpe_cerrado_prodes" - val uri: String = - uriForGrid(gridTile, kwargs) - - override def lookup(value: Int): Integer = - if (value == 0) null else value -} diff --git a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDF.scala b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDF.scala index b00d3406..4ed014b8 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDF.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticDF.scala @@ -29,13 +29,12 @@ object ForestChangeDiagnosticDF extends SummaryDF { throw new IllegalArgumentException(s"Can't produce DataFrame for $id") } - dataRDD - .map { - case Valid(Location(fid, data)) => - (rowId(fid), RowError.empty, data) - case Invalid(Location(fid, err)) => - (rowId(fid), RowError.fromJobError(err), ForestChangeDiagnosticData.empty) - } + dataRDD.map { + case Valid(Location(fid, data)) => + (rowId(fid), RowError.empty, data) + case Invalid(Location(fid, err)) => + (rowId(fid), RowError.fromJobError(err), ForestChangeDiagnosticData.empty) + } .toDF("id", "error", "data") .select($"id.*" :: $"error.*" :: fieldsFromCol($"data", featureFields): _*) } @@ -53,27 +52,25 @@ object ForestChangeDiagnosticDF extends SummaryDF { throw new IllegalArgumentException("Not a CombinedFeatureId") } - dataRDD - .map { - case Valid(Location(fid, data)) => - (rowId(fid), RowError.empty, data) - case Invalid(Location(fid, err)) => - (rowId(fid), RowError.fromJobError(err), ForestChangeDiagnosticData.empty) - } + dataRDD.map { + case Valid(Location(fid, data)) => + (rowId(fid), RowError.empty, data) + case Invalid(Location(fid, err)) => + (rowId(fid), RowError.fromJobError(err), ForestChangeDiagnosticData.empty) + } .toDF("id", "error", "data") .select($"id.*" :: $"error.*" :: fieldsFromCol($"data", featureFields) ::: fieldsFromCol($"data", gridFields): _*) } def readIntermidateRDD( sources: NonEmptyList[String], - spark: SparkSession + spark: SparkSession, ): RDD[ValidatedLocation[ForestChangeDiagnosticData]] = { val df = FeatureDF(sources, GfwProFeature, FeatureFilter.empty, spark) val ds = df.select( colsFor[RowGridId].as[RowGridId], colsFor[RowError].as[RowError], - colsFor[ForestChangeDiagnosticData].as[ForestChangeDiagnosticData] - ) + colsFor[ForestChangeDiagnosticData].as[ForestChangeDiagnosticData]) ds.rdd.map { case (id, error, data) => if (error.status_code == 2) Valid(Location(id.toFeatureID, data)) @@ -104,12 +101,9 @@ object ForestChangeDiagnosticDF extends SummaryDF { "tree_cover_loss_soy_yearly", // treeCoverLossSoyPlanedAreasYearly "tree_cover_loss_idn_legal_yearly", // treeCoverLossIDNForestAreaYearly "tree_cover_loss_idn_forest_moratorium_yearly", // treeCoverLossIDNForestMoratoriumYearly - "tree_cover_loss_prodes_amazon_yearly", // prodesLossAmazonYearly - "tree_cover_loss_prodes_cerrado_yearly", // prodesLossCerradoYearly - "tree_cover_loss_prodes_amazon_wdpa_yearly", // prodesLossAmazonProtectedAreasYearly - "tree_cover_loss_prodes_cerrado_wdpa_yearly", // prodesLossCerradoProtectedAreasYearly - "tree_cover_loss_prodes_amazon_primary_forest_yearly", // prodesLossProdesAmazonPrimaryForestYearly - "tree_cover_loss_prodes_cerrado_primary_forest_yearly", // prodesLossProdesCerradoPrimaryForestYearly + "tree_cover_loss_prodes_yearly", // prodesLossYearly + "tree_cover_loss_prodes_wdpa_yearly", // prodesLossProtectedAreasYearly + "tree_cover_loss_prodes_primary_forest_yearly", // prodesLossProdesPrimaryForestYearly "tree_cover_loss_brazil_biomes_yearly", // treeCoverLossBRABiomesYearly "tree_cover_extent_total", // treeCoverExtent "tree_cover_extent_primary_forest", // treeCoverExtentPrimaryForest @@ -155,4 +149,4 @@ object ForestChangeDiagnosticDF extends SummaryDF { "plantation_in_protected_areas_area" //plantationInProtectedAreasArea ) -} +} \ No newline at end of file diff --git a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticData.scala b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticData.scala index 29be9c73..881a302e 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticData.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticData.scala @@ -27,13 +27,10 @@ case class ForestChangeDiagnosticData( /** treeCoverLossIDNForestAreaYearly */ tree_cover_loss_idn_legal_yearly: ForestChangeDiagnosticDataLossYearlyCategory, tree_cover_loss_idn_forest_moratorium_yearly: ForestChangeDiagnosticDataLossYearly, - tree_cover_loss_prodes_amazon_yearly: ForestChangeDiagnosticDataLossYearly, - tree_cover_loss_prodes_cerrado_yearly: ForestChangeDiagnosticDataLossYearly, + tree_cover_loss_prodes_yearly: ForestChangeDiagnosticDataLossYearly, /** prodesLossProtectedAreasYearly */ - tree_cover_loss_prodes_amazon_wdpa_yearly: ForestChangeDiagnosticDataLossYearly, - tree_cover_loss_prodes_cerrado_wdpa_yearly: ForestChangeDiagnosticDataLossYearly, - tree_cover_loss_prodes_amazon_primary_forest_yearly: ForestChangeDiagnosticDataLossYearly, - tree_cover_loss_prodes_cerrado_primary_forest_yearly: ForestChangeDiagnosticDataLossYearly, + tree_cover_loss_prodes_wdpa_yearly: ForestChangeDiagnosticDataLossYearly, + tree_cover_loss_prodes_primary_forest_yearly: ForestChangeDiagnosticDataLossYearly, tree_cover_loss_brazil_biomes_yearly: ForestChangeDiagnosticDataLossYearlyCategory, tree_cover_extent_total: ForestChangeDiagnosticDataDouble, tree_cover_extent_primary_forest: ForestChangeDiagnosticDataDouble, @@ -119,19 +116,12 @@ case class ForestChangeDiagnosticData( tree_cover_loss_idn_forest_moratorium_yearly.merge( other.tree_cover_loss_idn_forest_moratorium_yearly ), - tree_cover_loss_prodes_amazon_yearly.merge(other.tree_cover_loss_prodes_amazon_yearly), - tree_cover_loss_prodes_cerrado_yearly.merge(other.tree_cover_loss_prodes_cerrado_yearly), - tree_cover_loss_prodes_amazon_wdpa_yearly.merge( - other.tree_cover_loss_prodes_amazon_wdpa_yearly + tree_cover_loss_prodes_yearly.merge(other.tree_cover_loss_prodes_yearly), + tree_cover_loss_prodes_wdpa_yearly.merge( + other.tree_cover_loss_prodes_wdpa_yearly ), - tree_cover_loss_prodes_cerrado_wdpa_yearly.merge( - other.tree_cover_loss_prodes_cerrado_wdpa_yearly - ), - tree_cover_loss_prodes_amazon_primary_forest_yearly.merge( - other.tree_cover_loss_prodes_amazon_primary_forest_yearly - ), - tree_cover_loss_prodes_cerrado_primary_forest_yearly.merge( - other.tree_cover_loss_prodes_cerrado_primary_forest_yearly + tree_cover_loss_prodes_primary_forest_yearly.merge( + other.tree_cover_loss_prodes_primary_forest_yearly ), tree_cover_loss_brazil_biomes_yearly.merge(other.tree_cover_loss_brazil_biomes_yearly), tree_cover_extent_total.merge(other.tree_cover_extent_total), @@ -181,27 +171,25 @@ case class ForestChangeDiagnosticData( ) } - /** @see - * https://docs.google.com/presentation/d/1nAq4mFNkv1q5vFvvXWReuLr4Znvr-1q-BDi6pl_5zTU/edit#slide=id.p + /** + * @see https://docs.google.com/presentation/d/1nAq4mFNkv1q5vFvvXWReuLr4Znvr-1q-BDi6pl_5zTU/edit#slide=id.p */ def withUpdatedCommodityRisk(): ForestChangeDiagnosticData = { /* Exclude the last year, limit data to 2021 to sync with palm risk tool: commodity_threat_deforestation, commodity_threat_peat, commodity_threat_protected_areas use year n and year n-1. Including information from the current year would under-represent these values as it's in progress. - */ + */ val minLossYear = ForestChangeDiagnosticDataLossYearly.prefilled.value.keys.min val maxLossYear = 2021 val years: List[Int] = List.range(minLossYear + 1, maxLossYear + 1) val forestValueIndicator: ForestChangeDiagnosticDataValueYearly = - ForestChangeDiagnosticDataValueYearly - .fill( - filtered_tree_cover_extent.value, - filtered_tree_cover_loss_yearly.value, - 2 - ) - .limitToMaxYear(maxLossYear) + ForestChangeDiagnosticDataValueYearly.fill( + filtered_tree_cover_extent.value, + filtered_tree_cover_loss_yearly.value, + 2 + ).limitToMaxYear(maxLossYear) val peatValueIndicator: ForestChangeDiagnosticDataValueYearly = ForestChangeDiagnosticDataValueYearly.fill(peat_area.value).limitToMaxYear(maxLossYear) @@ -212,22 +200,21 @@ case class ForestChangeDiagnosticData( val deforestationThreatIndicator: ForestChangeDiagnosticDataLossYearly = ForestChangeDiagnosticDataLossYearly( SortedMap( - years.map(year => - ( - year, { + years.map( + year => + (year, { // Somehow the compiler cannot infer the types correctly // I hence declare them here explicitly to help him out. val thisYearLoss: Double = - filtered_tree_cover_loss_yearly.value - .getOrElse(year, 0) + filtered_tree_cover_loss_yearly.value + .getOrElse(year, 0) val lastYearLoss: Double = filtered_tree_cover_loss_yearly.value .getOrElse(year - 1, 0) thisYearLoss + lastYearLoss - } - ) + }) ): _* ) ).limitToMaxYear(maxLossYear) @@ -235,14 +222,14 @@ case class ForestChangeDiagnosticData( val peatThreatIndicator: ForestChangeDiagnosticDataLossYearly = ForestChangeDiagnosticDataLossYearly( SortedMap( - years.map(year => - ( - year, { + years.map( + year => + (year, { // Somehow the compiler cannot infer the types correctly // I hence declare them here explicitly to help him out. val thisYearPeatLoss: Double = - filtered_tree_cover_loss_peat_yearly.value - .getOrElse(year, 0) + filtered_tree_cover_loss_peat_yearly.value + .getOrElse(year, 0) val lastYearPeatLoss: Double = filtered_tree_cover_loss_peat_yearly.value @@ -250,8 +237,7 @@ case class ForestChangeDiagnosticData( thisYearPeatLoss + lastYearPeatLoss + plantation_on_peat_area.value - } - ) + }) ): _* ) ).limitToMaxYear(maxLossYear) @@ -259,22 +245,21 @@ case class ForestChangeDiagnosticData( val protectedAreaThreatIndicator: ForestChangeDiagnosticDataLossYearly = ForestChangeDiagnosticDataLossYearly( SortedMap( - years.map(year => - ( - year, { + years.map( + year => + (year, { // Somehow the compiler cannot infer the types correctly // I hence declare them here explicitly to help him out. val thisYearProtectedAreaLoss: Double = - filtered_tree_cover_loss_protected_areas_yearly.value - .getOrElse(year, 0) + filtered_tree_cover_loss_protected_areas_yearly.value + .getOrElse(year, 0) val lastYearProtectedAreaLoss: Double = filtered_tree_cover_loss_protected_areas_yearly.value .getOrElse(year - 1, 0) thisYearProtectedAreaLoss + lastYearProtectedAreaLoss + plantation_in_protected_areas_area.value - } - ) + }) ): _* ) ).limitToMaxYear(maxLossYear) @@ -285,8 +270,7 @@ case class ForestChangeDiagnosticData( commodity_value_protected_areas = protectedAreaValueIndicator, commodity_threat_deforestation = deforestationThreatIndicator, commodity_threat_peat = peatThreatIndicator, - commodity_threat_protected_areas = protectedAreaThreatIndicator - ) + commodity_threat_protected_areas = protectedAreaThreatIndicator) } } @@ -310,9 +294,6 @@ object ForestChangeDiagnosticData { ForestChangeDiagnosticDataLossYearly.empty, ForestChangeDiagnosticDataLossYearly.empty, ForestChangeDiagnosticDataLossYearly.empty, - ForestChangeDiagnosticDataLossYearly.empty, - ForestChangeDiagnosticDataLossYearly.empty, - ForestChangeDiagnosticDataLossYearly.empty, ForestChangeDiagnosticDataLossYearlyCategory.empty, ForestChangeDiagnosticDataDouble.empty, ForestChangeDiagnosticDataDouble.empty, @@ -351,17 +332,17 @@ object ForestChangeDiagnosticData { ForestChangeDiagnosticDataLossYearly.empty, ForestChangeDiagnosticDataLossYearly.empty, ForestChangeDiagnosticDataLossYearly.empty, - ForestChangeDiagnosticDataLossYearly.empty + ForestChangeDiagnosticDataLossYearly.empty, ) implicit val lossDataSemigroup: Semigroup[ForestChangeDiagnosticData] = new Semigroup[ForestChangeDiagnosticData] { - def combine(x: ForestChangeDiagnosticData, y: ForestChangeDiagnosticData): ForestChangeDiagnosticData = + def combine(x: ForestChangeDiagnosticData, + y: ForestChangeDiagnosticData): ForestChangeDiagnosticData = x.merge(y) } implicit def dataExpressionEncoder: ExpressionEncoder[ForestChangeDiagnosticData] = - frameless - .TypedExpressionEncoder[ForestChangeDiagnosticData] + frameless.TypedExpressionEncoder[ForestChangeDiagnosticData] .asInstanceOf[ExpressionEncoder[ForestChangeDiagnosticData]] } diff --git a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticGridSources.scala b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticGridSources.scala index 6f359f6c..0fdc566c 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticGridSources.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticGridSources.scala @@ -6,10 +6,11 @@ import geotrellis.raster.Raster import org.globalforestwatch.grids.{GridSources, GridTile} import org.globalforestwatch.layers._ -/** @param gridTile - * top left corner, padded from east ex: "10N_010E" +/** + * @param gridTile top left corner, padded from east ex: "10N_010E" */ -case class ForestChangeDiagnosticGridSources(gridTile: GridTile, kwargs: Map[String, Any]) extends GridSources { +case class ForestChangeDiagnosticGridSources(gridTile: GridTile, kwargs: Map[String, Any]) + extends GridSources { val treeCoverLoss: TreeCoverLoss = TreeCoverLoss(gridTile, kwargs) val treeCoverDensity2000: TreeCoverDensityPercent2000 = TreeCoverDensityPercent2000(gridTile, kwargs) @@ -22,17 +23,17 @@ case class ForestChangeDiagnosticGridSources(gridTile: GridTile, kwargs: Map[Str val isSoyPlantedArea: SoyPlantedAreas = SoyPlantedAreas(gridTile, kwargs) val idnForestArea: IndonesiaForestArea = IndonesiaForestArea(gridTile, kwargs) val isIDNForestMoratorium: IndonesiaForestMoratorium = IndonesiaForestMoratorium(gridTile, kwargs) - val prodesAmazonLossYear: ProdesAmazonLossYear = ProdesAmazonLossYear(gridTile, kwargs) - val prodesCerradoLossYear: ProdesCerradoLossYear = ProdesCerradoLossYear(gridTile, kwargs) + val prodesLossYear: ProdesLossYear = ProdesLossYear(gridTile, kwargs) val braBiomes: BrazilBiomes = BrazilBiomes(gridTile, kwargs) val isPlantation: PlantedForestsBool = PlantedForestsBool(gridTile, kwargs) val gfwProCoverage: GFWProCoverage = GFWProCoverage(gridTile, kwargs) val argOTBN: ArgOTBN = ArgOTBN(gridTile, kwargs) + def readWindow( - windowKey: SpatialKey, - windowLayout: LayoutDefinition - ): Either[Throwable, Raster[ForestChangeDiagnosticTile]] = { + windowKey: SpatialKey, + windowLayout: LayoutDefinition + ): Either[Throwable, Raster[ForestChangeDiagnosticTile]] = { for { // Failure for any of these reads will result in function returning Left[Throwable] @@ -58,13 +59,13 @@ case class ForestChangeDiagnosticGridSources(gridTile: GridTile, kwargs: Map[Str val isSoyPlantedAreasTile = isSoyPlantedArea.fetchWindow(windowKey, windowLayout) val idnForestAreaTile = idnForestArea.fetchWindow(windowKey, windowLayout) val isINDForestMoratoriumTile = isIDNForestMoratorium.fetchWindow(windowKey, windowLayout) - val prodesAmazonLossYearTile = prodesAmazonLossYear.fetchWindow(windowKey, windowLayout) - val prodesCerradoLossYearTile = prodesCerradoLossYear.fetchWindow(windowKey, windowLayout) + val prodesLossYearTile = prodesLossYear.fetchWindow(windowKey, windowLayout) val braBiomesTile = braBiomes.fetchWindow(windowKey, windowLayout) val isPlantationTile = isPlantation.fetchWindow(windowKey, windowLayout) val gfwProCoverageTile = gfwProCoverage.fetchWindow(windowKey, windowLayout) val argOTBNTile = argOTBN.fetchWindow(windowKey, windowLayout) + val tile = ForestChangeDiagnosticTile( lossTile, tcd2000Tile, @@ -77,8 +78,7 @@ case class ForestChangeDiagnosticGridSources(gridTile: GridTile, kwargs: Map[Str isSoyPlantedAreasTile, idnForestAreaTile, isINDForestMoratoriumTile, - prodesAmazonLossYearTile, - prodesCerradoLossYearTile, + prodesLossYearTile, braBiomesTile, isPlantationTile, gfwProCoverageTile, @@ -98,9 +98,9 @@ object ForestChangeDiagnosticGridSources { .empty[String, ForestChangeDiagnosticGridSources] def getCachedSources( - gridTile: GridTile, - kwargs: Map[String, Any] - ): ForestChangeDiagnosticGridSources = { + gridTile: GridTile, + kwargs: Map[String, Any] + ): ForestChangeDiagnosticGridSources = { cache.getOrElseUpdate( gridTile.tileId, diff --git a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticRawDataGroup.scala b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticRawDataGroup.scala index bd4676a2..93e95ab7 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticRawDataGroup.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticRawDataGroup.scala @@ -1,34 +1,30 @@ package org.globalforestwatch.summarystats.forest_change_diagnostic -case class ForestChangeDiagnosticRawDataGroup( - umdTreeCoverLossYear: Int, - isUMDLoss: Boolean, - prodesAmazonLossYear: Int, - prodesCerradoLossYear: Int, - isProdesAmazonLoss: Boolean, - isProdesCerradoLoss: Boolean, - isTreeCoverExtent30: Boolean, - isTreeCoverExtent90: Boolean, - isPrimaryForest: Boolean, - isPeatlands: Boolean, - isIntactForestLandscapes2000: Boolean, - isProtectedArea: Boolean, - seAsiaLandCover: String, - idnLandCover: String, - isSoyPlantedAreas: Boolean, - idnForestArea: String, - isIdnForestMoratorium: Boolean, - braBiomes: String, - isPlantation: Boolean, - argOTBN: String, - southAmericaPresence: Boolean, - legalAmazonPresence: Boolean, - braBiomesPresence: Boolean, - cerradoBiomesPresence: Boolean, - seAsiaPresence: Boolean, - idnPresence: Boolean, - argPresence: Boolean -) { +case class ForestChangeDiagnosticRawDataGroup(umdTreeCoverLossYear: Int, + isUMDLoss: Boolean, + prodesLossYear: Int, + isProdesLoss: Boolean, + isTreeCoverExtent30: Boolean, + isTreeCoverExtent90: Boolean, + isPrimaryForest: Boolean, + isPeatlands: Boolean, + isIntactForestLandscapes2000: Boolean, + isProtectedArea: Boolean, + seAsiaLandCover: String, + idnLandCover: String, + isSoyPlantedAreas: Boolean, + idnForestArea: String, + isIdnForestMoratorium: Boolean, + braBiomes: String, + isPlantation: Boolean, + argOTBN: String, + southAmericaPresence: Boolean, + legalAmazonPresence: Boolean, + braBiomesPresence: Boolean, + cerradoBiomesPresence: Boolean, + seAsiaPresence: Boolean, + idnPresence: Boolean, + argPresence: Boolean) { /** Produce a partial ForestChangeDiagnosticData only for the loss year in this data group */ def toForestChangeDiagnosticData(totalArea: Double): ForestChangeDiagnosticData = ForestChangeDiagnosticData( @@ -42,96 +38,93 @@ case class ForestChangeDiagnosticRawDataGroup( totalArea, isUMDLoss && isTreeCoverExtent90 ), - tree_cover_loss_primary_forest_yearly = ForestChangeDiagnosticDataLossYearly.fill( - umdTreeCoverLossYear, - totalArea, - isPrimaryForest && isUMDLoss - ), + tree_cover_loss_primary_forest_yearly = + ForestChangeDiagnosticDataLossYearly.fill( + umdTreeCoverLossYear, + totalArea, + isPrimaryForest && isUMDLoss + ), tree_cover_loss_peat_yearly = ForestChangeDiagnosticDataLossYearly.fill( umdTreeCoverLossYear, totalArea, isPeatlands && isUMDLoss ), - tree_cover_loss_intact_forest_yearly = ForestChangeDiagnosticDataLossYearly.fill( - umdTreeCoverLossYear, - totalArea, - isIntactForestLandscapes2000 && isUMDLoss - ), - tree_cover_loss_protected_areas_yearly = ForestChangeDiagnosticDataLossYearly.fill( - umdTreeCoverLossYear, - totalArea, - isProtectedArea && isUMDLoss - ), - tree_cover_loss_arg_otbn_yearly = ForestChangeDiagnosticDataLossYearlyCategory.fill( - argOTBN, - umdTreeCoverLossYear, - totalArea, - include = isUMDLoss - ), - tree_cover_loss_sea_landcover_yearly = ForestChangeDiagnosticDataLossYearlyCategory.fill( - seAsiaLandCover, - umdTreeCoverLossYear, - totalArea, - include = isUMDLoss - ), - tree_cover_loss_idn_landcover_yearly = ForestChangeDiagnosticDataLossYearlyCategory.fill( - idnLandCover, - umdTreeCoverLossYear, - totalArea, - include = isUMDLoss - ), - tree_cover_loss_soy_yearly = ForestChangeDiagnosticDataLossYearly.fill( - umdTreeCoverLossYear, - totalArea, - isSoyPlantedAreas && isUMDLoss - ), - tree_cover_loss_idn_legal_yearly = ForestChangeDiagnosticDataLossYearlyCategory.fill( - idnForestArea, + tree_cover_loss_intact_forest_yearly = + ForestChangeDiagnosticDataLossYearly.fill( + umdTreeCoverLossYear, + totalArea, + isIntactForestLandscapes2000 && isUMDLoss + ), + tree_cover_loss_protected_areas_yearly = + ForestChangeDiagnosticDataLossYearly.fill( + umdTreeCoverLossYear, + totalArea, + isProtectedArea && isUMDLoss + ), + tree_cover_loss_arg_otbn_yearly = + ForestChangeDiagnosticDataLossYearlyCategory.fill( + argOTBN, umdTreeCoverLossYear, totalArea, include = isUMDLoss ), - tree_cover_loss_idn_forest_moratorium_yearly = ForestChangeDiagnosticDataLossYearly.fill( - umdTreeCoverLossYear, - totalArea, - isIdnForestMoratorium && isUMDLoss - ), - tree_cover_loss_prodes_amazon_yearly = ForestChangeDiagnosticDataLossYearly.fill( - prodesAmazonLossYear, - totalArea, - isProdesAmazonLoss - ), - tree_cover_loss_prodes_cerrado_yearly = ForestChangeDiagnosticDataLossYearly.fill( - prodesCerradoLossYear, - totalArea, - isProdesCerradoLoss - ), - tree_cover_loss_prodes_amazon_wdpa_yearly = ForestChangeDiagnosticDataLossYearly.fill( - prodesAmazonLossYear, - totalArea, - isProdesAmazonLoss && isProtectedArea - ), - tree_cover_loss_prodes_cerrado_wdpa_yearly = ForestChangeDiagnosticDataLossYearly.fill( - prodesCerradoLossYear, - totalArea, - isProdesCerradoLoss && isProtectedArea - ), - tree_cover_loss_prodes_amazon_primary_forest_yearly = ForestChangeDiagnosticDataLossYearly.fill( - prodesAmazonLossYear, - totalArea, - isProdesAmazonLoss && isPrimaryForest - ), - tree_cover_loss_prodes_cerrado_primary_forest_yearly = ForestChangeDiagnosticDataLossYearly.fill( - prodesCerradoLossYear, - totalArea, - isProdesCerradoLoss && isPrimaryForest - ), - tree_cover_loss_brazil_biomes_yearly = ForestChangeDiagnosticDataLossYearlyCategory.fill( - braBiomes, - umdTreeCoverLossYear, + tree_cover_loss_sea_landcover_yearly = + ForestChangeDiagnosticDataLossYearlyCategory.fill( + seAsiaLandCover, + umdTreeCoverLossYear, + totalArea, + include = isUMDLoss + ), + tree_cover_loss_idn_landcover_yearly = + ForestChangeDiagnosticDataLossYearlyCategory.fill( + idnLandCover, + umdTreeCoverLossYear, + totalArea, + include = isUMDLoss + ), + tree_cover_loss_soy_yearly = + ForestChangeDiagnosticDataLossYearly.fill( + umdTreeCoverLossYear, + totalArea, + isSoyPlantedAreas && isUMDLoss + ), + tree_cover_loss_idn_legal_yearly = + ForestChangeDiagnosticDataLossYearlyCategory.fill( + idnForestArea, + umdTreeCoverLossYear, + totalArea, + include = isUMDLoss + ), + tree_cover_loss_idn_forest_moratorium_yearly = + ForestChangeDiagnosticDataLossYearly.fill( + umdTreeCoverLossYear, + totalArea, + isIdnForestMoratorium && isUMDLoss + ), + tree_cover_loss_prodes_yearly = ForestChangeDiagnosticDataLossYearly.fill( + prodesLossYear, totalArea, - include = isUMDLoss + isProdesLoss ), + tree_cover_loss_prodes_wdpa_yearly = + ForestChangeDiagnosticDataLossYearly.fill( + prodesLossYear, + totalArea, + isProdesLoss && isProtectedArea + ), + tree_cover_loss_prodes_primary_forest_yearly = + ForestChangeDiagnosticDataLossYearly.fill( + prodesLossYear, + totalArea, + isProdesLoss && isPrimaryForest + ), + tree_cover_loss_brazil_biomes_yearly = + ForestChangeDiagnosticDataLossYearlyCategory.fill( + braBiomes, + umdTreeCoverLossYear, + totalArea, + include = isUMDLoss + ), tree_cover_extent_total = ForestChangeDiagnosticDataDouble .fill(totalArea, isTreeCoverExtent30), tree_cover_extent_primary_forest = ForestChangeDiagnosticDataDouble.fill( @@ -179,30 +172,36 @@ case class ForestChangeDiagnosticRawDataGroup( .fill(braBiomesPresence), cerrado_biome_presence = ForestChangeDiagnosticDataBoolean .fill(cerradoBiomesPresence), - southeast_asia_presence = ForestChangeDiagnosticDataBoolean.fill(seAsiaPresence), - indonesia_presence = ForestChangeDiagnosticDataBoolean.fill(idnPresence), - argentina_presence = ForestChangeDiagnosticDataBoolean.fill(argPresence), + southeast_asia_presence = + ForestChangeDiagnosticDataBoolean.fill(seAsiaPresence), + indonesia_presence = + ForestChangeDiagnosticDataBoolean.fill(idnPresence), + argentina_presence = + ForestChangeDiagnosticDataBoolean.fill(argPresence), filtered_tree_cover_extent = ForestChangeDiagnosticDataDouble .fill( totalArea, isTreeCoverExtent90 && !isPlantation ), - filtered_tree_cover_extent_yearly = ForestChangeDiagnosticDataValueYearly.empty, + filtered_tree_cover_extent_yearly = + ForestChangeDiagnosticDataValueYearly.empty, filtered_tree_cover_loss_yearly = ForestChangeDiagnosticDataLossYearly.fill( umdTreeCoverLossYear, totalArea, isUMDLoss && isTreeCoverExtent90 && !isPlantation ), - filtered_tree_cover_loss_peat_yearly = ForestChangeDiagnosticDataLossYearly.fill( - umdTreeCoverLossYear, - totalArea, - isUMDLoss && isTreeCoverExtent90 && !isPlantation && isPeatlands - ), - filtered_tree_cover_loss_protected_areas_yearly = ForestChangeDiagnosticDataLossYearly.fill( - umdTreeCoverLossYear, - totalArea, - isUMDLoss && isTreeCoverExtent90 && !isPlantation && isProtectedArea - ), + filtered_tree_cover_loss_peat_yearly = + ForestChangeDiagnosticDataLossYearly.fill( + umdTreeCoverLossYear, + totalArea, + isUMDLoss && isTreeCoverExtent90 && !isPlantation && isPeatlands + ), + filtered_tree_cover_loss_protected_areas_yearly = + ForestChangeDiagnosticDataLossYearly.fill( + umdTreeCoverLossYear, + totalArea, + isUMDLoss && isTreeCoverExtent90 && !isPlantation && isProtectedArea + ), plantation_area = ForestChangeDiagnosticDataDouble .fill(totalArea, isPlantation), plantation_on_peat_area = ForestChangeDiagnosticDataDouble @@ -223,4 +222,6 @@ case class ForestChangeDiagnosticRawDataGroup( commodity_threat_protected_areas = ForestChangeDiagnosticDataLossYearly.empty, commodity_threat_fires = ForestChangeDiagnosticDataLossYearly.empty ) -} + } + + diff --git a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticSummary.scala b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticSummary.scala index cd11fed3..76f86e5d 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticSummary.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticSummary.scala @@ -8,8 +8,9 @@ import org.globalforestwatch.util.Geodesy /** LossData Summary by year */ case class ForestChangeDiagnosticSummary( - stats: Map[ForestChangeDiagnosticRawDataGroup, ForestChangeDiagnosticRawData] = Map.empty -) extends Summary[ForestChangeDiagnosticSummary] { + stats: Map[ForestChangeDiagnosticRawDataGroup, + ForestChangeDiagnosticRawData] = Map.empty + ) extends Summary[ForestChangeDiagnosticSummary] { /** Combine two Maps and combine their LossData when a year is present in both */ def merge( @@ -38,23 +39,23 @@ object ForestChangeDiagnosticSummary { def getGridVisitor( kwargs: Map[String, Any] - ): GridVisitor[Raster[ForestChangeDiagnosticTile], ForestChangeDiagnosticSummary] = + ): GridVisitor[Raster[ForestChangeDiagnosticTile], + ForestChangeDiagnosticSummary] = new GridVisitor[Raster[ForestChangeDiagnosticTile], ForestChangeDiagnosticSummary] { private var acc: ForestChangeDiagnosticSummary = new ForestChangeDiagnosticSummary() def result: ForestChangeDiagnosticSummary = acc - def visit(raster: Raster[ForestChangeDiagnosticTile], col: Int, row: Int): Unit = { + def visit(raster: Raster[ForestChangeDiagnosticTile], + col: Int, + row: Int): Unit = { // This is a pixel by pixel operation // pixel Area val lat: Double = raster.rasterExtent.gridRowToMap(row) - val area: Double = Geodesy.pixelArea( - lat, - raster.cellSize - ) // uses Pixel's center coordiate. +- raster.cellSize.height/2 doesn't make much of a difference + val area: Double = Geodesy.pixelArea(lat, raster.cellSize) // uses Pixel's center coordiate. +- raster.cellSize.height/2 doesn't make much of a difference val areaHa = area / 10000.0 // input layers @@ -75,16 +76,8 @@ object ForestChangeDiagnosticSummary { val isIntactForestLandscapes2000: Boolean = raster.tile.isIntactForestLandscapes2000.getData(col, row) val wdpa: String = raster.tile.wdpaProtectedAreas.getData(col, row) - val prodesAmazonLossYear: Int = { - val loss = raster.tile.prodesAmazonLossYear.getData(col, row) - if (loss != null) { - loss.toInt - } else { - 0 - } - } - val prodesCerradoLossYear: Int = { - val loss = raster.tile.prodesCerradoLossYear.getData(col, row) + val prodesLossYear: Int = { + val loss = raster.tile.prodesLossYear.getData(col, row) if (loss != null) { loss.toInt } else { @@ -110,8 +103,8 @@ object ForestChangeDiagnosticSummary { val isTreeCoverExtent90: Boolean = tcd2000 > 90 val isUMDLoss: Boolean = isTreeCoverExtent30 && umdTreeCoverLossYear > 0 val isProtectedArea: Boolean = wdpa != "" - val isProdesAmazonLoss: Boolean = prodesAmazonLossYear > 0 - val isProdesCerradoLoss: Boolean = prodesCerradoLossYear > 0 + val isProdesLoss: Boolean = prodesLossYear > 0 + val southAmericaPresence = gfwProCoverage.getOrElse("South America", false) val legalAmazonPresence = @@ -126,10 +119,8 @@ object ForestChangeDiagnosticSummary { val groupKey = ForestChangeDiagnosticRawDataGroup( umdTreeCoverLossYear, isUMDLoss, - prodesAmazonLossYear, - prodesCerradoLossYear, - isProdesAmazonLoss, - isProdesCerradoLoss, + prodesLossYear, + isProdesLoss, isTreeCoverExtent30, isTreeCoverExtent90, isPrimaryForest, diff --git a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticTile.scala b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticTile.scala index 1051bbaa..26649710 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticTile.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticTile.scala @@ -3,28 +3,29 @@ package org.globalforestwatch.summarystats.forest_change_diagnostic import geotrellis.raster.{CellGrid, CellType} import org.globalforestwatch.layers._ -/** Tile-like structure to hold tiles from datasets required for our summary. We can not use GeoTrellis MultibandTile because it requires - * all bands share a CellType. +/** + * + * Tile-like structure to hold tiles from datasets required for our summary. + * We can not use GeoTrellis MultibandTile because it requires all bands share a CellType. */ case class ForestChangeDiagnosticTile( - loss: TreeCoverLoss#ITile, - tcd2000: TreeCoverDensityPercent2000#ITile, - isPrimaryForest: PrimaryForest#OptionalITile, - isPeatlands: Peatlands#OptionalITile, - isIntactForestLandscapes2000: IntactForestLandscapes2000#OptionalITile, - wdpaProtectedAreas: ProtectedAreas#OptionalITile, - seAsiaLandCover: SEAsiaLandCover#OptionalITile, - idnLandCover: IndonesiaLandCover#OptionalITile, - isSoyPlantedArea: SoyPlantedAreas#OptionalITile, - idnForestArea: IndonesiaForestArea#OptionalITile, - isIDNForestMoratorium: IndonesiaForestMoratorium#OptionalITile, - prodesAmazonLossYear: ProdesAmazonLossYear#OptionalITile, - prodesCerradoLossYear: ProdesCerradoLossYear#OptionalITile, - braBiomes: BrazilBiomes#OptionalITile, - isPlantation: PlantedForestsBool#OptionalITile, - gfwProCoverage: GFWProCoverage#OptionalITile, - argOTBN: ArgOTBN#OptionalITile -) extends CellGrid[Int] { + loss: TreeCoverLoss#ITile, + tcd2000: TreeCoverDensityPercent2000#ITile, + isPrimaryForest: PrimaryForest#OptionalITile, + isPeatlands: Peatlands#OptionalITile, + isIntactForestLandscapes2000: IntactForestLandscapes2000#OptionalITile, + wdpaProtectedAreas: ProtectedAreas#OptionalITile, + seAsiaLandCover: SEAsiaLandCover#OptionalITile, + idnLandCover: IndonesiaLandCover#OptionalITile, + isSoyPlantedArea: SoyPlantedAreas#OptionalITile, + idnForestArea: IndonesiaForestArea#OptionalITile, + isIDNForestMoratorium: IndonesiaForestMoratorium#OptionalITile, + prodesLossYear: ProdesLossYear#OptionalITile, + braBiomes: BrazilBiomes#OptionalITile, + isPlantation: PlantedForestsBool#OptionalITile, + gfwProCoverage: GFWProCoverage#OptionalITile, + argOTBN: ArgOTBN#OptionalITile + ) extends CellGrid[Int] { def cellType: CellType = loss.cellType diff --git a/src/test/resources/palm-32-fcd-output/part-00000-16db04de-3864-40aa-98a5-c6ca58e882ce-c000.csv b/src/test/resources/palm-32-fcd-output/part-00000-4a23d5d0-56c1-4840-8264-9b4a48a83e9c-c000.csv similarity index 72% rename from src/test/resources/palm-32-fcd-output/part-00000-16db04de-3864-40aa-98a5-c6ca58e882ce-c000.csv rename to src/test/resources/palm-32-fcd-output/part-00000-4a23d5d0-56c1-4840-8264-9b4a48a83e9c-c000.csv index 5d76cb71..23a5d78c 100644 --- a/src/test/resources/palm-32-fcd-output/part-00000-16db04de-3864-40aa-98a5-c6ca58e882ce-c000.csv +++ b/src/test/resources/palm-32-fcd-output/part-00000-4a23d5d0-56c1-4840-8264-9b4a48a83e9c-c000.csv @@ -1,2 +1,2 @@ -list_id location_id status_code location_error tree_cover_loss_total_yearly tree_cover_loss_primary_forest_yearly tree_cover_loss_peat_yearly tree_cover_loss_intact_forest_yearly tree_cover_loss_protected_areas_yearly tree_cover_loss_arg_otbn_yearly tree_cover_loss_sea_landcover_yearly tree_cover_loss_idn_landcover_yearly tree_cover_loss_soy_yearly tree_cover_loss_idn_legal_yearly tree_cover_loss_idn_forest_moratorium_yearly tree_cover_loss_prodes_amazon_yearly tree_cover_loss_prodes_cerrado_yearly tree_cover_loss_prodes_amazon_wdpa_yearly tree_cover_loss_prodes_cerrado_wdpa_yearly tree_cover_loss_prodes_amazon_primary_forest_yearly tree_cover_loss_prodes_cerrado_primary_forest_yearly tree_cover_loss_brazil_biomes_yearly tree_cover_extent_total tree_cover_extent_primary_forest tree_cover_extent_protected_areas tree_cover_extent_peat tree_cover_extent_intact_forest natural_habitat_primary natural_habitat_intact_forest total_area protected_areas_area peat_area arg_otbn_area brazil_biomes idn_legal_area sea_landcover_area idn_landcover_area idn_forest_moratorium_area south_america_presence legal_amazon_presence brazil_biomes_presence cerrado_biome_presence southeast_asia_presence indonesia_presence argentina_presence commodity_value_forest_extent commodity_value_peat commodity_value_protected_areas commodity_threat_deforestation commodity_threat_peat commodity_threat_protected_areas commodity_threat_fires -1 31 2 {"2001":1021.7622,"2002":851.014,"2003":310.1835,"2004":2169.8398,"2005":2325.3843,"2006":4162.4968,"2007":2968.7863,"2008":4015.4403,"2009":2002.9194,"2010":1173.7001,"2011":1703.6902,"2012":2838.0498,"2013":1841.7568,"2014":2468.7732,"2015":2028.9672,"2016":3344.8135,"2017":1026.7609,"2018":525.5327,"2019":618.7052,"2020":924.699,"2021":857.8225,"2022":560.0482} {"2001":154.8617,"2002":306.7253,"2003":92.3781,"2004":717.7405,"2005":1202.6952,"2006":1831.5766,"2007":1668.2764,"2008":1753.2317,"2009":797.282,"2010":454.5023,"2011":872.3613,"2012":1251.8543,"2013":1083.6799,"2014":1290.2177,"2015":1360.2574,"2016":2313.5001,"2017":286.2809,"2018":159.8557,"2019":162.3929,"2020":134.2652,"2021":167.4697,"2022":133.6506} {"2001":557.4251,"2002":236.2539,"2003":71.8566,"2004":741.25,"2005":957.52,"2006":1229.3335,"2007":1037.5018,"2008":891.235,"2009":486.4665,"2010":363.5759,"2011":411.9212,"2012":1078.9246,"2013":862.5621,"2014":974.783,"2015":942.4571,"2016":1472.8429,"2017":211.3403,"2018":144.7173,"2019":148.7917,"2020":142.3323,"2021":122.7372,"2022":94.914} {} {"2001":42.2692,"2002":228.1732,"2003":11.3743,"2004":3.9196,"2005":1.614,"2006":15.3711,"2007":4.4576,"2008":2.8437,"2009":4.765,"2010":7.9931,"2011":10.7597,"2012":8.1466,"2013":0.1537,"2014":8.5307,"2015":18.6758,"2016":139.2616,"2017":10.7596,"2018":0.3843,"2019":0.2306,"2020":0.0,"2021":0.0769,"2022":0.0} {} {"Rubber plantation":{"2001":3.0745,"2002":16.5256,"2003":36.0493,"2004":66.1791,"2005":73.4812,"2006":25.9797,"2007":5.9184,"2008":56.571,"2009":47.7317,"2010":33.3581,"2011":21.9825,"2012":52.9583,"2013":11.9137,"2014":42.2742,"2015":34.2038,"2016":63.4883,"2017":10.6839,"2018":24.4423,"2019":22.1363,"2020":10.4533,"2021":25.826,"2022":26.1332},"Secondary forest":{"2001":240.1012,"2002":352.6874,"2003":51.186,"2004":522.8408,"2005":879.6014,"2006":1310.6826,"2007":981.6686,"2008":756.8744,"2009":359.2934,"2010":232.485,"2011":575.4717,"2012":1110.4372,"2013":787.2514,"2014":772.2979,"2015":966.528,"2016":1571.8466,"2017":149.9382,"2018":89.3794,"2019":136.8781,"2020":121.8915,"2021":68.6318,"2022":99.681},"Agriculture":{"2001":3.151,"2002":9.1452,"2003":5.4563,"2004":53.8715,"2005":30.3561,"2006":22.9009,"2007":6.5323,"2008":10.9893,"2009":159.7649,"2010":38.7323,"2011":100.4403,"2012":104.3592,"2013":15.3698,"2014":35.8124,"2015":19.6734,"2016":38.1942,"2017":19.2886,"2018":10.5282,"2019":11.2197,"2020":7.9922,"2021":12.1419,"2022":9.2218},"Oil palm plantation":{"2001":389.5357,"2002":222.339,"2003":103.9797,"2004":96.4524,"2005":67.8614,"2006":368.7244,"2007":440.2632,"2008":428.9814,"2009":151.7946,"2010":184.5942,"2011":113.5139,"2012":263.0128,"2013":147.9443,"2014":88.3878,"2015":58.1061,"2016":70.7105,"2017":44.5029,"2018":31.2823,"2019":233.0475,"2020":526.052,"2021":395.9972,"2022":105.2237},"Swamp":{"2001":265.2372,"2002":112.4372,"2003":38.2726,"2004":648.495,"2005":548.2747,"2006":855.4703,"2007":1129.3025,"2008":2086.08,"2009":484.4962,"2010":300.1085,"2011":526.378,"2012":478.8799,"2013":482.4034,"2014":742.3953,"2015":446.518,"2016":539.8938,"2017":620.8959,"2018":204.1215,"2019":105.5176,"2020":122.2736,"2021":197.9767,"2022":240.6287},"Settlements":{"2001":0.1537,"2002":0.9992,"2003":0.0,"2004":0.6918,"2005":0.1537,"2006":1.1529,"2007":1.1529,"2008":0.538,"2009":1.0761,"2010":0.8455,"2011":1.1529,"2012":0.8454,"2013":0.0,"2014":0.6918,"2015":0.1537,"2016":0.2306,"2017":0.3843,"2018":0.0,"2019":0.1537,"2020":1.1529,"2021":1.3067,"2022":1.691},"Grassland/shrub":{"2001":59.3337,"2002":89.231,"2003":37.5821,"2004":445.7701,"2005":432.4583,"2006":514.3995,"2007":235.9463,"2008":500.7963,"2009":334.6362,"2010":269.6786,"2011":186.2981,"2012":378.8895,"2013":330.4736,"2014":424.3189,"2015":165.2413,"2016":151.5619,"2017":77.7013,"2018":84.6964,"2019":29.59,"2020":91.3842,"2021":53.8004,"2022":41.7326},"Primary forest":{"2001":41.1934,"2002":30.6653,"2003":13.68,"2004":98.6793,"2005":209.8123,"2006":379.429,"2007":115.8962,"2008":96.2208,"2009":368.2156,"2010":47.8819,"2011":42.0413,"2012":228.795,"2013":26.1305,"2014":255.8481,"2015":270.3755,"2016":823.8133,"2017":81.5399,"2018":47.9595,"2019":64.4845,"2020":22.7495,"2021":71.7856,"2022":5.7642},"Water bodies":{"2001":0.8454,"2002":0.0768,"2003":0.0,"2004":0.1537,"2005":0.0,"2006":0.0,"2007":0.0769,"2008":0.0,"2009":0.0,"2010":0.1537,"2011":0.2306,"2012":0.6916,"2013":0.6917,"2014":0.6148,"2015":0.0,"2016":0.2306,"2017":0.0768,"2018":0.0,"2019":0.0,"2020":0.0,"2021":0.2305,"2022":0.1537},"Mixed tree crops":{"2001":19.1363,"2002":16.9073,"2003":23.9776,"2004":236.7062,"2005":83.3852,"2006":683.7575,"2007":52.029,"2008":78.3891,"2009":95.9108,"2010":65.8624,"2011":136.1808,"2012":219.1809,"2013":39.5784,"2014":106.132,"2015":68.1674,"2016":84.8439,"2017":21.749,"2018":33.1229,"2019":15.6777,"2020":20.7498,"2021":30.1257,"2022":29.8185}} {"Bare land":{"2001":3.8428,"2002":35.2766,"2003":5.3801,"2004":14.4491,"2005":17.6005,"2006":39.8116,"2007":99.1447,"2008":141.5687,"2009":59.9482,"2010":20.7508,"2011":136.3415,"2012":129.3478,"2013":94.2991,"2014":83.0794,"2015":280.0642,"2016":735.1371,"2017":28.9729,"2018":36.1198,"2019":8.3774,"2020":7.1477,"2021":8.0699,"2022":2.3824},"Mining":{"2001":7.301,"2002":2.7666,"2003":5.2258,"2004":11.9889,"2005":15.2172,"2006":9.1456,"2007":7.6082,"2008":34.8914,"2009":16.9072,"2010":8.9918,"2011":12.4502,"2012":29.5112,"2013":1.0759,"2014":16.8304,"2015":2.5362,"2016":1.9982,"2017":1.7676,"2018":0.7685,"2019":0.3074,"2020":0.4611,"2021":1.7676,"2022":3.5353},"Settlement":{"2001":30.2802,"2002":84.4598,"2003":6.7627,"2004":5.226,"2005":1.9982,"2006":15.5239,"2007":5.3029,"2008":146.7178,"2009":9.1456,"2010":5.9944,"2011":8.2999,"2012":20.9038,"2013":6.4557,"2014":10.4519,"2015":14.0641,"2016":20.5962,"2017":9.6834,"2018":5.9946,"2019":5.6871,"2020":7.5318,"2021":7.5314,"2022":7.7622},"Secondary forest":{"2001":14.8329,"2002":34.5077,"2003":10.3753,"2004":63.4026,"2005":86.5381,"2006":58.5628,"2007":81.0051,"2008":222.8806,"2009":92.1507,"2010":46.96,"2011":105.6723,"2012":258.1458,"2013":358.5935,"2014":604.2224,"2015":692.3818,"2016":1208.3837,"2017":259.4575,"2018":110.7479,"2019":151.2503,"2020":92.763,"2021":119.8204,"2022":109.8252},"Agriculture":{"2001":87.6883,"2002":42.4224,"2003":18.9819,"2004":289.878,"2005":266.1325,"2006":746.3828,"2007":376.2672,"2008":177.9118,"2009":282.8805,"2010":120.1185,"2011":271.2771,"2012":638.4726,"2013":155.6217,"2014":248.7641,"2015":220.5643,"2016":382.8723,"2017":105.1299,"2018":55.5624,"2019":44.6495,"2020":56.5613,"2021":42.3446,"2022":44.4967},"Swamp":{"2001":110.6023,"2002":161.6235,"2003":30.1276,"2004":349.3156,"2005":345.4651,"2006":346.3096,"2007":162.935,"2008":218.7309,"2009":146.9478,"2010":95.2199,"2011":132.4202,"2012":382.8885,"2013":159.6257,"2014":317.4138,"2015":296.5912,"2016":418.5593,"2017":115.2812,"2018":74.3201,"2019":85.1564,"2020":78.4694,"2021":128.1948,"2022":88.1532},"Grassland/shrub":{"2001":4.9185,"2002":20.2891,"2003":11.7584,"2004":38.7334,"2005":22.748,"2006":135.7978,"2007":15.2937,"2008":74.7011,"2009":35.6594,"2010":20.4429,"2011":50.7993,"2012":105.518,"2013":11.4509,"2014":46.2648,"2015":55.7949,"2016":62.8662,"2017":264.4497,"2018":34.5065,"2019":9.8372,"2020":5.1492,"2021":6.9934,"2022":55.8711},"Estate crop plantation":{"2001":759.8369,"2002":469.6682,"2003":221.0338,"2004":1396.0776,"2005":1569.454,"2006":2808.3496,"2007":2218.0015,"2008":2990.2754,"2009":1359.0495,"2010":854.7606,"2011":983.3553,"2012":1269.5731,"2013":1053.2509,"2014":1138.9796,"2015":466.1251,"2016":509.328,"2017":239.7129,"2018":205.5913,"2019":312.8252,"2020":676.3081,"2021":539.8723,"2022":247.7148},"Body of water":{"2001":2.4593,"2002":0.0,"2003":0.538,"2004":0.7685,"2005":0.2306,"2006":2.6132,"2007":3.228,"2008":7.7625,"2009":0.2306,"2010":0.4611,"2011":3.0743,"2012":3.689,"2013":1.3834,"2014":2.7668,"2015":0.8454,"2016":5.0725,"2017":2.3057,"2018":1.9215,"2019":0.6148,"2020":0.3074,"2021":3.228,"2022":0.3074}} {} {"Other Utilization Area":{"2001":712.0267,"2002":482.1867,"2003":221.5682,"2004":1414.5116,"2005":1126.5942,"2006":2837.7298,"2007":1853.8397,"2008":3013.7624,"2009":1165.5631,"2010":833.4598,"2011":1098.1437,"2012":1865.7614,"2013":971.0994,"2014":1259.078,"2015":657.4037,"2016":999.5492,"2017":622.2031,"2018":279.4429,"2019":376.9137,"2020":705.8893,"2021":595.4311,"2022":299.8162},"Production Forest":{"2001":113.1434,"2002":80.4763,"2003":7.6858,"2004":26.0567,"2005":48.7316,"2006":183.5469,"2007":84.3149,"2008":147.2649,"2009":84.4729,"2010":56.57,"2011":110.1392,"2012":156.3372,"2013":49.1136,"2014":173.4782,"2015":154.1063,"2016":334.2621,"2017":35.1256,"2018":10.4532,"2019":17.4472,"2020":24.9028,"2021":17.5241,"2022":26.3625},"Converted Production Forest":{"2001":151.8635,"2002":60.1778,"2003":69.0172,"2004":724.5834,"2005":1148.2139,"2006":1123.3127,"2007":1023.561,"2008":844.268,"2009":747.8878,"2010":275.2161,"2011":481.5731,"2012":804.1156,"2013":820.0067,"2014":1024.9196,"2015":1197.936,"2016":1866.668,"2017":356.367,"2018":233.3308,"2019":223.499,"2020":193.5994,"2021":241.5624,"2022":233.5622},"Sanctuary Reserves/Nature Conservation Area":{"2001":42.2692,"2002":228.1732,"2003":11.3743,"2004":3.9196,"2005":1.614,"2006":15.3711,"2007":4.4576,"2008":2.8437,"2009":4.765,"2010":7.9931,"2011":10.7597,"2012":8.1466,"2013":0.1537,"2014":8.5307,"2015":18.6758,"2016":139.2616,"2017":10.7596,"2018":0.3843,"2019":0.2306,"2020":0.0,"2021":0.0769,"2022":0.0}} {"2001":85.0014,"2002":248.2325,"2003":18.829,"2004":97.8293,"2005":96.2941,"2006":176.9875,"2007":138.7928,"2008":129.4126,"2009":109.4342,"2010":65.0144,"2011":100.5959,"2012":428.132,"2013":566.3779,"2014":467.2467,"2015":304.2577,"2016":712.6515,"2017":145.3232,"2018":56.2574,"2019":82.8502,"2020":54.0272,"2021":24.2097,"2022":14.7553} {} {} {} {} {} {} {} 76338.8266 34513.678 6014.0986 23301.5138 0.0 34530.8164 0.0 125583.7284 6614.0107 31984.9079 {} {} {"Other Utilization Area":81822.5991,"Production Forest":4848.8827,"Converted Production Forest":28600.5448,"Sanctuary Reserves/Nature Conservation Area":6614.0107} {"Rubber plantation":3542.3364,"Secondary forest":24896.0268,"Agriculture":2763.3729,"Oil palm plantation":24398.5485,"Swamp":29043.6031,"Settlements":851.2415,"Grassland/shrub":22752.6953,"Primary forest":8261.1709,"Water bodies":3133.0348,"Mixed tree crops":5941.6982} {"Bare land":2902.1353,"Mining":1392.3433,"Settlement":5798.0268,"Secondary forest":21966.5842,"Agriculture":9963.5593,"Swamp":7625.7847,"Grassland/shrub":2875.049,"Estate crop plantation":69353.0242,"Body of water":3707.2217} 13342.6717 false false false false true true false {"2002":24579.8084,"2003":24451.4643,"2004":24189.9373,"2005":24132.0676,"2006":23677.7243,"2007":23217.4612,"2008":22247.8139,"2009":21697.0839,"2010":21235.5008,"2011":20791.3746,"2012":20560.122,"2013":20057.1275,"2014":19054.2097,"2015":18742.9578,"2016":18210.5965,"2017":17209.7962,"2018":15452.9753,"2019":15189.6774,"2020":15071.8608,"2021":14916.0005} {"2002":31984.9079,"2003":31984.9079,"2004":31984.9079,"2005":31984.9079,"2006":31984.9079,"2007":31984.9079,"2008":31984.9079,"2009":31984.9079,"2010":31984.9079,"2011":31984.9079,"2012":31984.9079,"2013":31984.9079,"2014":31984.9079,"2015":31984.9079,"2016":31984.9079,"2017":31984.9079,"2018":31984.9079,"2019":31984.9079,"2020":31984.9079,"2021":31984.9079} {"2002":6614.0107,"2003":6614.0107,"2004":6614.0107,"2005":6614.0107,"2006":6614.0107,"2007":6614.0107,"2008":6614.0107,"2009":6614.0107,"2010":6614.0107,"2011":6614.0107,"2012":6614.0107,"2013":6614.0107,"2014":6614.0107,"2015":6614.0107,"2016":6614.0107,"2017":6614.0107,"2018":6614.0107,"2019":6614.0107,"2020":6614.0107,"2021":6614.0107} {"2002":389.8711,"2003":319.3967,"2004":512.213,"2005":914.6063,"2006":1429.9104,"2007":1520.3773,"2008":1012.313,"2009":905.7094,"2010":675.3788,"2011":734.247,"2012":1505.9123,"2013":1314.1698,"2014":843.6132,"2015":1533.1615,"2016":2757.6213,"2017":2020.1188,"2018":381.1145,"2019":273.6768,"2020":272.6016,"2021":245.5511} {"2002":13363.7925,"2003":13309.3807,"2004":13472.9171,"2005":13694.9388,"2006":14063.9863,"2007":14195.4089,"2008":13789.4768,"2009":13613.3257,"2010":13540.0855,"2011":13478.2203,"2012":13889.142,"2013":13891.8339,"2014":13604.4896,"2015":14052.9405,"2016":14710.2098,"2017":14207.349,"2018":13390.077,"2019":13385.3927,"2020":13385.8532,"2021":13355.5725} {"2002":222.102,"2003":200.4294,"2004":9.6067,"2005":0.4611,"2006":3.7659,"2007":3.8427,"2008":0.1537,"2009":1.2297,"2010":2.8437,"2011":2.9205,"2012":3.2279,"2013":1.9982,"2014":0.3843,"2015":13.7571,"2016":74.1656,"2017":63.0214,"2018":2.2287,"2019":0.0,"2020":0.0,"2021":0.0} {} +list_id location_id status_code location_error tree_cover_loss_total_yearly tree_cover_loss_primary_forest_yearly tree_cover_loss_peat_yearly tree_cover_loss_intact_forest_yearly tree_cover_loss_protected_areas_yearly tree_cover_loss_arg_otbn_yearly tree_cover_loss_sea_landcover_yearly tree_cover_loss_idn_landcover_yearly tree_cover_loss_soy_yearly tree_cover_loss_idn_legal_yearly tree_cover_loss_idn_forest_moratorium_yearly tree_cover_loss_prodes_yearly tree_cover_loss_prodes_wdpa_yearly tree_cover_loss_prodes_primary_forest_yearly tree_cover_loss_brazil_biomes_yearly tree_cover_extent_total tree_cover_extent_primary_forest tree_cover_extent_protected_areas tree_cover_extent_peat tree_cover_extent_intact_forest natural_habitat_primary natural_habitat_intact_forest total_area protected_areas_area peat_area arg_otbn_area brazil_biomes idn_legal_area sea_landcover_area idn_landcover_area idn_forest_moratorium_area south_america_presence legal_amazon_presence brazil_biomes_presence cerrado_biome_presence southeast_asia_presence indonesia_presence argentina_presence commodity_value_forest_extent commodity_value_peat commodity_value_protected_areas commodity_threat_deforestation commodity_threat_peat commodity_threat_protected_areas commodity_threat_fires +1 31 2 {"2001":1021.7622,"2002":851.014,"2003":310.1835,"2004":2169.8398,"2005":2325.3843,"2006":4162.4968,"2007":2968.7863,"2008":4015.4403,"2009":2002.9194,"2010":1173.7001,"2011":1703.6902,"2012":2838.0498,"2013":1841.7568,"2014":2468.7732,"2015":2028.9672,"2016":3344.8135,"2017":1026.7609,"2018":525.5327,"2019":618.7052,"2020":924.699,"2021":857.8225,"2022":560.0482} {"2001":154.8617,"2002":306.7253,"2003":92.3781,"2004":717.7405,"2005":1202.6952,"2006":1831.5766,"2007":1668.2764,"2008":1753.2317,"2009":797.282,"2010":454.5023,"2011":872.3613,"2012":1251.8543,"2013":1083.6799,"2014":1290.2177,"2015":1360.2574,"2016":2313.5001,"2017":286.2809,"2018":159.8557,"2019":162.3929,"2020":134.2652,"2021":167.4697,"2022":133.6506} {"2001":557.4251,"2002":236.2539,"2003":71.8566,"2004":741.25,"2005":957.52,"2006":1229.3335,"2007":1037.5018,"2008":891.235,"2009":486.4665,"2010":363.5759,"2011":411.9212,"2012":1078.9246,"2013":862.5621,"2014":974.783,"2015":942.4571,"2016":1472.8429,"2017":211.3403,"2018":144.7173,"2019":148.7917,"2020":142.3323,"2021":122.7372,"2022":94.914} {} {"2001":42.2692,"2002":228.1732,"2003":11.3743,"2004":3.9196,"2005":1.614,"2006":15.3711,"2007":4.4576,"2008":2.8437,"2009":4.765,"2010":7.9931,"2011":10.7597,"2012":8.1466,"2013":0.1537,"2014":8.5307,"2015":18.6758,"2016":139.2616,"2017":10.7596,"2018":0.3843,"2019":0.2306,"2020":0.0,"2021":0.0769,"2022":0.0} {} {"Rubber plantation":{"2001":3.0745,"2002":16.5256,"2003":36.0493,"2004":66.1791,"2005":73.4812,"2006":25.9797,"2007":5.9184,"2008":56.571,"2009":47.7317,"2010":33.3581,"2011":21.9825,"2012":52.9583,"2013":11.9137,"2014":42.2742,"2015":34.2038,"2016":63.4883,"2017":10.6839,"2018":24.4423,"2019":22.1363,"2020":10.4533,"2021":25.826,"2022":26.1332},"Secondary forest":{"2001":240.1012,"2002":352.6874,"2003":51.186,"2004":522.8408,"2005":879.6014,"2006":1310.6826,"2007":981.6686,"2008":756.8744,"2009":359.2934,"2010":232.485,"2011":575.4717,"2012":1110.4372,"2013":787.2514,"2014":772.2979,"2015":966.528,"2016":1571.8466,"2017":149.9382,"2018":89.3794,"2019":136.8781,"2020":121.8915,"2021":68.6318,"2022":99.681},"Agriculture":{"2001":3.151,"2002":9.1452,"2003":5.4563,"2004":53.8715,"2005":30.3561,"2006":22.9009,"2007":6.5323,"2008":10.9893,"2009":159.7649,"2010":38.7323,"2011":100.4403,"2012":104.3592,"2013":15.3698,"2014":35.8124,"2015":19.6734,"2016":38.1942,"2017":19.2886,"2018":10.5282,"2019":11.2197,"2020":7.9922,"2021":12.1419,"2022":9.2218},"Oil palm plantation":{"2001":389.5357,"2002":222.339,"2003":103.9797,"2004":96.4524,"2005":67.8614,"2006":368.7244,"2007":440.2632,"2008":428.9814,"2009":151.7946,"2010":184.5942,"2011":113.5139,"2012":263.0128,"2013":147.9443,"2014":88.3878,"2015":58.1061,"2016":70.7105,"2017":44.5029,"2018":31.2823,"2019":233.0475,"2020":526.052,"2021":395.9972,"2022":105.2237},"Swamp":{"2001":265.2372,"2002":112.4372,"2003":38.2726,"2004":648.495,"2005":548.2747,"2006":855.4703,"2007":1129.3025,"2008":2086.08,"2009":484.4962,"2010":300.1085,"2011":526.378,"2012":478.8799,"2013":482.4034,"2014":742.3953,"2015":446.518,"2016":539.8938,"2017":620.8959,"2018":204.1215,"2019":105.5176,"2020":122.2736,"2021":197.9767,"2022":240.6287},"Settlements":{"2001":0.1537,"2002":0.9992,"2003":0.0,"2004":0.6918,"2005":0.1537,"2006":1.1529,"2007":1.1529,"2008":0.538,"2009":1.0761,"2010":0.8455,"2011":1.1529,"2012":0.8454,"2013":0.0,"2014":0.6918,"2015":0.1537,"2016":0.2306,"2017":0.3843,"2018":0.0,"2019":0.1537,"2020":1.1529,"2021":1.3067,"2022":1.691},"Grassland/shrub":{"2001":59.3337,"2002":89.231,"2003":37.5821,"2004":445.7701,"2005":432.4583,"2006":514.3995,"2007":235.9463,"2008":500.7963,"2009":334.6362,"2010":269.6786,"2011":186.2981,"2012":378.8895,"2013":330.4736,"2014":424.3189,"2015":165.2413,"2016":151.5619,"2017":77.7013,"2018":84.6964,"2019":29.59,"2020":91.3842,"2021":53.8004,"2022":41.7326},"Primary forest":{"2001":41.1934,"2002":30.6653,"2003":13.68,"2004":98.6793,"2005":209.8123,"2006":379.429,"2007":115.8962,"2008":96.2208,"2009":368.2156,"2010":47.8819,"2011":42.0413,"2012":228.795,"2013":26.1305,"2014":255.8481,"2015":270.3755,"2016":823.8133,"2017":81.5399,"2018":47.9595,"2019":64.4845,"2020":22.7495,"2021":71.7856,"2022":5.7642},"Water bodies":{"2001":0.8454,"2002":0.0768,"2003":0.0,"2004":0.1537,"2005":0.0,"2006":0.0,"2007":0.0769,"2008":0.0,"2009":0.0,"2010":0.1537,"2011":0.2306,"2012":0.6916,"2013":0.6917,"2014":0.6148,"2015":0.0,"2016":0.2306,"2017":0.0768,"2018":0.0,"2019":0.0,"2020":0.0,"2021":0.2305,"2022":0.1537},"Mixed tree crops":{"2001":19.1363,"2002":16.9073,"2003":23.9776,"2004":236.7062,"2005":83.3852,"2006":683.7575,"2007":52.029,"2008":78.3891,"2009":95.9108,"2010":65.8624,"2011":136.1808,"2012":219.1809,"2013":39.5784,"2014":106.132,"2015":68.1674,"2016":84.8439,"2017":21.749,"2018":33.1229,"2019":15.6777,"2020":20.7498,"2021":30.1257,"2022":29.8185}} {"Bare land":{"2001":3.8428,"2002":35.2766,"2003":5.3801,"2004":14.4491,"2005":17.6005,"2006":39.8116,"2007":99.1447,"2008":141.5687,"2009":59.9482,"2010":20.7508,"2011":136.3415,"2012":129.3478,"2013":94.2991,"2014":83.0794,"2015":280.0642,"2016":735.1371,"2017":28.9729,"2018":36.1198,"2019":8.3774,"2020":7.1477,"2021":8.0699,"2022":2.3824},"Mining":{"2001":7.301,"2002":2.7666,"2003":5.2258,"2004":11.9889,"2005":15.2172,"2006":9.1456,"2007":7.6082,"2008":34.8914,"2009":16.9072,"2010":8.9918,"2011":12.4502,"2012":29.5112,"2013":1.0759,"2014":16.8304,"2015":2.5362,"2016":1.9982,"2017":1.7676,"2018":0.7685,"2019":0.3074,"2020":0.4611,"2021":1.7676,"2022":3.5353},"Settlement":{"2001":30.2802,"2002":84.4598,"2003":6.7627,"2004":5.226,"2005":1.9982,"2006":15.5239,"2007":5.3029,"2008":146.7178,"2009":9.1456,"2010":5.9944,"2011":8.2999,"2012":20.9038,"2013":6.4557,"2014":10.4519,"2015":14.0641,"2016":20.5962,"2017":9.6834,"2018":5.9946,"2019":5.6871,"2020":7.5318,"2021":7.5314,"2022":7.7622},"Secondary forest":{"2001":14.8329,"2002":34.5077,"2003":10.3753,"2004":63.4026,"2005":86.5381,"2006":58.5628,"2007":81.0051,"2008":222.8806,"2009":92.1507,"2010":46.96,"2011":105.6723,"2012":258.1458,"2013":358.5935,"2014":604.2224,"2015":692.3818,"2016":1208.3837,"2017":259.4575,"2018":110.7479,"2019":151.2503,"2020":92.763,"2021":119.8204,"2022":109.8252},"Agriculture":{"2001":87.6883,"2002":42.4224,"2003":18.9819,"2004":289.878,"2005":266.1325,"2006":746.3828,"2007":376.2672,"2008":177.9118,"2009":282.8805,"2010":120.1185,"2011":271.2771,"2012":638.4726,"2013":155.6217,"2014":248.7641,"2015":220.5643,"2016":382.8723,"2017":105.1299,"2018":55.5624,"2019":44.6495,"2020":56.5613,"2021":42.3446,"2022":44.4967},"Swamp":{"2001":110.6023,"2002":161.6235,"2003":30.1276,"2004":349.3156,"2005":345.4651,"2006":346.3096,"2007":162.935,"2008":218.7309,"2009":146.9478,"2010":95.2199,"2011":132.4202,"2012":382.8885,"2013":159.6257,"2014":317.4138,"2015":296.5912,"2016":418.5593,"2017":115.2812,"2018":74.3201,"2019":85.1564,"2020":78.4694,"2021":128.1948,"2022":88.1532},"Grassland/shrub":{"2001":4.9185,"2002":20.2891,"2003":11.7584,"2004":38.7334,"2005":22.748,"2006":135.7978,"2007":15.2937,"2008":74.7011,"2009":35.6594,"2010":20.4429,"2011":50.7993,"2012":105.518,"2013":11.4509,"2014":46.2648,"2015":55.7949,"2016":62.8662,"2017":264.4497,"2018":34.5065,"2019":9.8372,"2020":5.1492,"2021":6.9934,"2022":55.8711},"Estate crop plantation":{"2001":759.8369,"2002":469.6682,"2003":221.0338,"2004":1396.0776,"2005":1569.454,"2006":2808.3496,"2007":2218.0015,"2008":2990.2754,"2009":1359.0495,"2010":854.7606,"2011":983.3553,"2012":1269.5731,"2013":1053.2509,"2014":1138.9796,"2015":466.1251,"2016":509.328,"2017":239.7129,"2018":205.5913,"2019":312.8252,"2020":676.3081,"2021":539.8723,"2022":247.7148},"Body of water":{"2001":2.4593,"2002":0.0,"2003":0.538,"2004":0.7685,"2005":0.2306,"2006":2.6132,"2007":3.228,"2008":7.7625,"2009":0.2306,"2010":0.4611,"2011":3.0743,"2012":3.689,"2013":1.3834,"2014":2.7668,"2015":0.8454,"2016":5.0725,"2017":2.3057,"2018":1.9215,"2019":0.6148,"2020":0.3074,"2021":3.228,"2022":0.3074}} {} {"Other Utilization Area":{"2001":712.0267,"2002":482.1867,"2003":221.5682,"2004":1414.5116,"2005":1126.5942,"2006":2837.7298,"2007":1853.8397,"2008":3013.7624,"2009":1165.5631,"2010":833.4598,"2011":1098.1437,"2012":1865.7614,"2013":971.0994,"2014":1259.078,"2015":657.4037,"2016":999.5492,"2017":622.2031,"2018":279.4429,"2019":376.9137,"2020":705.8893,"2021":595.4311,"2022":299.8162},"Production Forest":{"2001":113.1434,"2002":80.4763,"2003":7.6858,"2004":26.0567,"2005":48.7316,"2006":183.5469,"2007":84.3149,"2008":147.2649,"2009":84.4729,"2010":56.57,"2011":110.1392,"2012":156.3372,"2013":49.1136,"2014":173.4782,"2015":154.1063,"2016":334.2621,"2017":35.1256,"2018":10.4532,"2019":17.4472,"2020":24.9028,"2021":17.5241,"2022":26.3625},"Converted Production Forest":{"2001":151.8635,"2002":60.1778,"2003":69.0172,"2004":724.5834,"2005":1148.2139,"2006":1123.3127,"2007":1023.561,"2008":844.268,"2009":747.8878,"2010":275.2161,"2011":481.5731,"2012":804.1156,"2013":820.0067,"2014":1024.9196,"2015":1197.936,"2016":1866.668,"2017":356.367,"2018":233.3308,"2019":223.499,"2020":193.5994,"2021":241.5624,"2022":233.5622},"Sanctuary Reserves/Nature Conservation Area":{"2001":42.2692,"2002":228.1732,"2003":11.3743,"2004":3.9196,"2005":1.614,"2006":15.3711,"2007":4.4576,"2008":2.8437,"2009":4.765,"2010":7.9931,"2011":10.7597,"2012":8.1466,"2013":0.1537,"2014":8.5307,"2015":18.6758,"2016":139.2616,"2017":10.7596,"2018":0.3843,"2019":0.2306,"2020":0.0,"2021":0.0769,"2022":0.0}} {"2001":85.0014,"2002":248.2325,"2003":18.829,"2004":97.8293,"2005":96.2941,"2006":176.9875,"2007":138.7928,"2008":129.4126,"2009":109.4342,"2010":65.0144,"2011":100.5959,"2012":428.132,"2013":566.3779,"2014":467.2467,"2015":304.2577,"2016":712.6515,"2017":145.3232,"2018":56.2574,"2019":82.8502,"2020":54.0272,"2021":24.2097,"2022":14.7553} {} {} {} {} 76338.8266 34513.678 6014.0986 23301.5138 0.0 34530.8164 0.0 125583.7284 6614.0107 31984.9079 {} {} {"Other Utilization Area":81822.5991,"Production Forest":4848.8827,"Converted Production Forest":28600.5448,"Sanctuary Reserves/Nature Conservation Area":6614.0107} {"Rubber plantation":3542.3364,"Secondary forest":24896.0268,"Agriculture":2763.3729,"Oil palm plantation":24398.5485,"Swamp":29043.6031,"Settlements":851.2415,"Grassland/shrub":22752.6953,"Primary forest":8261.1709,"Water bodies":3133.0348,"Mixed tree crops":5941.6982} {"Bare land":2902.1353,"Mining":1392.3433,"Settlement":5798.0268,"Secondary forest":21966.5842,"Agriculture":9963.5593,"Swamp":7625.7847,"Grassland/shrub":2875.049,"Estate crop plantation":69353.0242,"Body of water":3707.2217} 13342.6717 false false false false true true false {"2002":24579.8084,"2003":24451.4643,"2004":24189.9373,"2005":24132.0676,"2006":23677.7243,"2007":23217.4612,"2008":22247.8139,"2009":21697.0839,"2010":21235.5008,"2011":20791.3746,"2012":20560.122,"2013":20057.1275,"2014":19054.2097,"2015":18742.9578,"2016":18210.5965,"2017":17209.7962,"2018":15452.9753,"2019":15189.6774,"2020":15071.8608,"2021":14916.0005} {"2002":31984.9079,"2003":31984.9079,"2004":31984.9079,"2005":31984.9079,"2006":31984.9079,"2007":31984.9079,"2008":31984.9079,"2009":31984.9079,"2010":31984.9079,"2011":31984.9079,"2012":31984.9079,"2013":31984.9079,"2014":31984.9079,"2015":31984.9079,"2016":31984.9079,"2017":31984.9079,"2018":31984.9079,"2019":31984.9079,"2020":31984.9079,"2021":31984.9079} {"2002":6614.0107,"2003":6614.0107,"2004":6614.0107,"2005":6614.0107,"2006":6614.0107,"2007":6614.0107,"2008":6614.0107,"2009":6614.0107,"2010":6614.0107,"2011":6614.0107,"2012":6614.0107,"2013":6614.0107,"2014":6614.0107,"2015":6614.0107,"2016":6614.0107,"2017":6614.0107,"2018":6614.0107,"2019":6614.0107,"2020":6614.0107,"2021":6614.0107} {"2002":389.8711,"2003":319.3967,"2004":512.213,"2005":914.6063,"2006":1429.9104,"2007":1520.3773,"2008":1012.313,"2009":905.7094,"2010":675.3788,"2011":734.247,"2012":1505.9123,"2013":1314.1698,"2014":843.6132,"2015":1533.1615,"2016":2757.6213,"2017":2020.1188,"2018":381.1145,"2019":273.6768,"2020":272.6016,"2021":245.5511} {"2002":13363.7925,"2003":13309.3807,"2004":13472.9171,"2005":13694.9388,"2006":14063.9863,"2007":14195.4089,"2008":13789.4768,"2009":13613.3257,"2010":13540.0855,"2011":13478.2203,"2012":13889.142,"2013":13891.8339,"2014":13604.4896,"2015":14052.9405,"2016":14710.2098,"2017":14207.349,"2018":13390.077,"2019":13385.3927,"2020":13385.8532,"2021":13355.5725} {"2002":222.102,"2003":200.4294,"2004":9.6067,"2005":0.4611,"2006":3.7659,"2007":3.8427,"2008":0.1537,"2009":1.2297,"2010":2.8437,"2011":2.9205,"2012":3.2279,"2013":1.9982,"2014":0.3843,"2015":13.7571,"2016":74.1656,"2017":63.0214,"2018":2.2287,"2019":0.0,"2020":0.0,"2021":0.0} {} diff --git a/src/test/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticAnalysisSpec.scala b/src/test/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticAnalysisSpec.scala index dab5eab5..016450dc 100644 --- a/src/test/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticAnalysisSpec.scala +++ b/src/test/scala/org/globalforestwatch/summarystats/forest_change_diagnostic/ForestChangeDiagnosticAnalysisSpec.scala @@ -36,8 +36,7 @@ class ForestChangeDiagnosticAnalysisSpec extends TestEnvironment with DataFrameC /** Function to update expected results when this test becomes invalid */ def saveExpectedFcdResult(fcd: DataFrame): Unit = { - fcd - .repartition(1) + fcd.repartition(1) .write .mode(SaveMode.Overwrite) .options(ForestChangeDiagnosticExport.csvOptions) From d9391b7efe5cda91dd2a540b9a6f2c829e0db0bb Mon Sep 17 00:00:00 2001 From: manukala6 Date: Wed, 27 Sep 2023 08:40:19 -0700 Subject: [PATCH 33/33] Filter locationId before analysis --- .../summarystats/afi/AFiAnalysis.scala | 1 + .../summarystats/afi/AFiCommand.scala | 12 +++++++++--- .../globalforestwatch/summarystats/afi/AFiDF.scala | 2 -- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala index 742d7cfc..0b852204 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiAnalysis.scala @@ -48,6 +48,7 @@ object AFiAnalysis extends SummaryAnalysis { val summaryDF = AFiAnalysis.aggregateResults( AFiDF .getFeatureDataFrame(summaryRDD, spark) + .filter(!$"gadm_id".contains("null")) .withColumn( "gadm_id", when(col("location_id") =!= -1|| col("gadm_id").contains("null"), lit("") ).otherwise(col("gadm_id")) ) diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiCommand.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiCommand.scala index a636d0f0..af46c216 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiCommand.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiCommand.scala @@ -9,13 +9,15 @@ import org.apache.sedona.core.spatialRDD.SpatialRDD import org.globalforestwatch.config.GfwConfig import org.globalforestwatch.features._ import org.locationtech.jts.geom.Geometry +import cats.data.Validated.Valid + object AFiCommand extends SummaryCommand { val afiCommand: Opts[Unit] = Opts.subcommand( name = AFiAnalysis.name, help = "Compute summary statistics for GFW Pro Dashboard." - ) { + ) ( ( defaultOptions, featureFilterOptions, @@ -30,14 +32,18 @@ object AFiCommand extends SummaryCommand { runAnalysis { implicit spark => val featureRDD = ValidatedFeatureRDD(default.featureUris, default.featureType, featureFilter, default.splitFeatures) + val filteredFeatureRDD = featureRDD.filter{ + case Valid((GfwProFeatureId(_, locationId), _)) => locationId != -2 + case _ => true + } AFiAnalysis( - featureRDD, + filteredFeatureRDD, default.featureType, spark, kwargs ) } } - } + ) } diff --git a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala index cd094ae9..a1b49ac0 100644 --- a/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala +++ b/src/main/scala/org/globalforestwatch/summarystats/afi/AFiDF.scala @@ -40,7 +40,5 @@ object AFiDF extends SummaryDF { } .toDF("id", "error", "dataGroup", "data") .select($"id.*", $"error.*", $"dataGroup.*", $"data.*") - .filter($"location_id" =!= -2) - .filter(!$"gadm_id".contains("null")) } }