From aafd09328b174e1f62c64ca448372ee6460d1c8b Mon Sep 17 00:00:00 2001 From: Marcel Gerber Date: Fri, 12 Apr 2024 14:11:46 +0200 Subject: [PATCH 1/3] perf(table): optimize `tableForSelection` calls --- .../core-table/src/OwidTable.ts | 44 +++++++++++++++++++ .../grapher/src/lineCharts/LineChart.tsx | 11 ++--- .../stackedCharts/AbstractStackedChart.tsx | 11 +---- 3 files changed, 48 insertions(+), 18 deletions(-) diff --git a/packages/@ourworldindata/core-table/src/OwidTable.ts b/packages/@ourworldindata/core-table/src/OwidTable.ts index 8a65d486b0f..42afb19bce4 100644 --- a/packages/@ourworldindata/core-table/src/OwidTable.ts +++ b/packages/@ourworldindata/core-table/src/OwidTable.ts @@ -279,6 +279,50 @@ export class OwidTable extends CoreTable { ) } + dropEntitiesThatHaveNoDataInSomeColumn(columnSlugs: ColumnSlug[]): this { + const indexesByEntityName = this.rowIndicesByEntityName + + // Iterate over all entities, and remove them as we go if they have no data in some column + const entityNamesToKeep = new Set(indexesByEntityName.keys()) + + for (let i = 0; i <= columnSlugs.length; i++) { + const slug = columnSlugs[i] + const col = this.get(slug) + + // Optimization, if there are no error values in this column, we can skip this column + if (col.numErrorValues === 0) continue + + for (const entityName of entityNamesToKeep) { + const indicesForEntityName = indexesByEntityName.get(entityName) + if (!indicesForEntityName) + throw new Error("Unexpected: entity not found in index map") + + // Optimization: We don't care about the number of valid/error values, we just need + // to know if there is at least one valid value + const hasSomeValidValueForEntityInCol = + indicesForEntityName.some((index) => + isNotErrorValue(col.valuesIncludingErrorValues[index]) + ) + + // Optimization: If we find a column that this entity has no data in we can remove + // it immediately, no need to iterate over other columns also + if (!hasSomeValidValueForEntityInCol) + entityNamesToKeep.delete(entityName) + } + } + + // const entityNamesToDrop = differenceOfSets([ + // this.availableEntityNameSet, + // entityNamesToKeep, + // ]) + + return this.columnFilter( + this.entityNameSlug, + (rowEntityName) => entityNamesToKeep.has(rowEntityName as string), + `Drop entities that have no data in some column: ${columnSlugs.join(", ")}` + ) + } + private sumsByTime(columnSlug: ColumnSlug): Map { const timeValues = this.timeColumn.values const values = this.get(columnSlug).values as number[] diff --git a/packages/@ourworldindata/grapher/src/lineCharts/LineChart.tsx b/packages/@ourworldindata/grapher/src/lineCharts/LineChart.tsx index 5f490e54b5c..8d2ab30ba18 100644 --- a/packages/@ourworldindata/grapher/src/lineCharts/LineChart.tsx +++ b/packages/@ourworldindata/grapher/src/lineCharts/LineChart.tsx @@ -72,7 +72,6 @@ import { OwidTable, CoreColumn, isNotErrorValue, - BlankOwidTable, } from "@ourworldindata/core-table" import { autoDetectSeriesStrategy, @@ -312,13 +311,9 @@ export class LineChart this.yColumnSlugs ) - const groupedByEntity = table - .groupBy(table.entityNameColumn.slug) - .filter((t) => !t.hasAnyColumnNoValidValue(this.yColumnSlugs)) - - if (groupedByEntity.length === 0) return BlankOwidTable() - - table = groupedByEntity[0].concat(groupedByEntity.slice(1)) + table = table.dropEntitiesThatHaveNoDataInSomeColumn( + this.yColumnSlugs + ) } return table diff --git a/packages/@ourworldindata/grapher/src/stackedCharts/AbstractStackedChart.tsx b/packages/@ourworldindata/grapher/src/stackedCharts/AbstractStackedChart.tsx index c7821b18bce..f274633b72c 100644 --- a/packages/@ourworldindata/grapher/src/stackedCharts/AbstractStackedChart.tsx +++ b/packages/@ourworldindata/grapher/src/stackedCharts/AbstractStackedChart.tsx @@ -27,7 +27,6 @@ import { import { OwidTable, CoreColumn, - BlankOwidTable, isNotErrorValueOrEmptyCell, } from "@ourworldindata/core-table" import { @@ -103,15 +102,7 @@ export class AbstractStackedChart }) } - const groupedByEntity = table - .groupBy(table.entityNameColumn.slug) - .map((t: OwidTable) => - t.dropRowsWithErrorValuesForAnyColumn(this.yColumnSlugs) - ) - - if (groupedByEntity.length === 0) return BlankOwidTable() - - table = groupedByEntity[0].concat(groupedByEntity.slice(1)) + table = table.dropRowsWithErrorValuesForAnyColumn(this.yColumnSlugs) } return table From 0d63731941f4bf1f13f47c436b9ebd0ec0df32dd Mon Sep 17 00:00:00 2001 From: Marcel Gerber Date: Fri, 12 Apr 2024 16:14:18 +0200 Subject: [PATCH 2/3] style: add comment --- packages/@ourworldindata/core-table/src/OwidTable.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/@ourworldindata/core-table/src/OwidTable.ts b/packages/@ourworldindata/core-table/src/OwidTable.ts index 42afb19bce4..5983473bf02 100644 --- a/packages/@ourworldindata/core-table/src/OwidTable.ts +++ b/packages/@ourworldindata/core-table/src/OwidTable.ts @@ -279,6 +279,7 @@ export class OwidTable extends CoreTable { ) } + // Drop _all rows_ for an entity if there is any column that has no valid values for that entity. dropEntitiesThatHaveNoDataInSomeColumn(columnSlugs: ColumnSlug[]): this { const indexesByEntityName = this.rowIndicesByEntityName From 5ecb333378bd3e37479bed24eace9998647a328e Mon Sep 17 00:00:00 2001 From: Marcel Gerber Date: Tue, 16 Apr 2024 09:39:44 +0200 Subject: [PATCH 3/3] enhance(table): clearer transform description --- .../@ourworldindata/core-table/src/OwidTable.ts | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/packages/@ourworldindata/core-table/src/OwidTable.ts b/packages/@ourworldindata/core-table/src/OwidTable.ts index 5983473bf02..e1c224f5296 100644 --- a/packages/@ourworldindata/core-table/src/OwidTable.ts +++ b/packages/@ourworldindata/core-table/src/OwidTable.ts @@ -23,6 +23,7 @@ import { ColumnSlug, imemo, ToleranceStrategy, + differenceOfSets, } from "@ourworldindata/utils" import { Integer, @@ -312,15 +313,19 @@ export class OwidTable extends CoreTable { } } - // const entityNamesToDrop = differenceOfSets([ - // this.availableEntityNameSet, - // entityNamesToKeep, - // ]) + const entityNamesToDrop = differenceOfSets([ + this.availableEntityNameSet, + entityNamesToKeep, + ]) + const droppedEntitiesStr = + entityNamesToDrop.size > 0 + ? [...entityNamesToDrop].join(", ") + : "(None)" return this.columnFilter( this.entityNameSlug, (rowEntityName) => entityNamesToKeep.has(rowEntityName as string), - `Drop entities that have no data in some column: ${columnSlugs.join(", ")}` + `Drop entities that have no data in some column: ${columnSlugs.join(", ")}.\nDropped entities: ${droppedEntitiesStr}` ) }