Skip to content

Commit

Permalink
Update France data for Chartbook (#2492)
Browse files Browse the repository at this point in the history
  • Loading branch information
paarriagadap authored Jun 26, 2024
1 parent 70b0397 commit 7d0ee83
Show file tree
Hide file tree
Showing 47 changed files with 2,119 additions and 0 deletions.
49 changes: 49 additions & 0 deletions dag/poverty_inequality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,52 @@ steps:
data://grapher/oecd/2024-04-30/affordable_housing_database:
- data://garden/oecd/2024-04-30/affordable_housing_database

# Inequality data from France (INSEE)
# 1999 report
data://meadow/insee/2024-03-21/inequality_france_1999:
- snapshot://insee/2024-03-21/inequality_france_1999.csv
data://garden/insee/2024-03-21/inequality_france_1999:
- data://meadow/insee/2024-03-21/inequality_france_1999
# 2021 report
data://meadow/insee/2024-04-25/insee_premiere_1875:
- snapshot://insee/2024-04-25/insee_premiere_1875.xlsx
data://garden/insee/2024-04-25/insee_premiere_1875:
- data://meadow/insee/2024-04-25/insee_premiere_1875
# Key figures (inequality) - live version
data://meadow/insee/2024-04-05/inequality_france:
- snapshot://insee/2024-04-05/inequality_france.xlsx
data://garden/insee/2024-04-05/inequality_france:
- data://meadow/insee/2024-04-05/inequality_france
# Key figures (relative poverty) - live version
data://meadow/insee/2024-04-26/relative_poverty_france:
- snapshot://insee/2024-04-26/relative_poverty_france.xlsx
data://garden/insee/2024-04-26/relative_poverty_france:
- data://meadow/insee/2024-04-26/relative_poverty_france
# Interdecile ratio for net salary (version 2022)
data://meadow/insee/2024-05-23/interdecile_ratio_2022:
- snapshot://insee/2024-05-23/interdecile_ratio_2022.csv
data://garden/insee/2024-05-23/interdecile_ratio_2022:
- data://meadow/insee/2024-05-23/interdecile_ratio_2022
# Interdecile ratio for net salary (live version)
data://meadow/insee/2024-05-23/interdecile_ratio:
- snapshot://insee/2024-05-23/interdecile_ratio.csv
data://garden/insee/2024-05-23/interdecile_ratio:
- data://meadow/insee/2024-05-23/interdecile_ratio

# France historical Ginis (Concialdi, 1997)
data://meadow/chartbook/2024-04-22/concialdi:
- snapshot://chartbook/2024-04-22/concialdi.csv
data://garden/chartbook/2024-04-22/concialdi:
- data://meadow/chartbook/2024-04-22/concialdi

# UNU-WIDER World Income Inequality Database (WIID)
data://meadow/unu_wider/2024-04-22/world_income_inequality_database:
- snapshot://unu_wider/2024-04-22/world_income_inequality_database.xlsx
data://garden/unu_wider/2024-04-22/world_income_inequality_database:
- data://meadow/unu_wider/2024-04-22/world_income_inequality_database

# Wealth inequalith in France (Piketty et al. 2006)
data://meadow/chartbook/2024-05-23/wealth_france:
- snapshot://chartbook/2024-05-23/wealth_france.csv
data://garden/chartbook/2024-05-23/wealth_france:
- data://meadow/chartbook/2024-05-23/wealth_france
29 changes: 29 additions & 0 deletions etl/steps/data/garden/chartbook/2024-04-22/concialdi.meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# NOTE: To learn more about the fields, hover over their names.
definitions:
common:
processing_level: minor
presentation:
topic_tags:
- Economic Inequality


# Learn more about the available fields:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
dataset:
update_period_days: 365


tables:
concialdi:
variables:
gini:
title: Gini index
unit: ""
short_unit: ""
description_short: The [Gini index](#dod:gini) measures inequality on a scale from 0 to 100. Higher values indicate higher inequality. Inequality is measured here in terms of income before taxes and after benefits.
presentation:
title_public: Gini index
display:
name: Gini index
numDecimalPlaces: 2
tolerance: 5
30 changes: 30 additions & 0 deletions etl/steps/data/garden/chartbook/2024-04-22/concialdi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Load a meadow dataset and create a garden dataset."""

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load meadow dataset.
ds_meadow = paths.load_dataset("concialdi")

# Read table from meadow dataset.
tb = ds_meadow["concialdi"].reset_index()

tb = tb.format(["country", "year"])

#
# Save outputs.
#
# Create a new garden dataset with the same metadata as the meadow dataset.
ds_garden = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata
)

# Save changes in the new garden dataset.
ds_garden.save()
57 changes: 57 additions & 0 deletions etl/steps/data/garden/chartbook/2024-05-23/wealth_france.meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# NOTE: To learn more about the fields, hover over their names.
definitions:
common:
processing_level: minor
description_key:
- Data is estimated from wealth concentration at death, based on estate tax returns.
- Data before 1902 has been estimated by sampling decedents in the Paris region.
presentation:
topic_tags:
- Economic Inequality


# Learn more about the available fields:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
dataset:
update_period_days: 365


tables:
wealth_france:
variables:
p90p100_share:
title: Richest decile - Wealth share
unit: "%"
short_unit: "%"
description_short: The share of wealth owned by the richest decile (tenth of the population).
presentation:
title_public: Wealth share of the richest 10%
display:
name: Wealth share of the richest 10%
numDecimalPlaces: 1
tolerance: 5

p99p100_share:
title: Top 1% - Wealth share
unit: "%"
short_unit: "%"
description_short: The share of wealth owned by the richest 1%.
presentation:
title_public: Wealth share of the richest 1%
display:
name: Wealth share of the richest 1%
numDecimalPlaces: 1
tolerance: 5

p99_9p100_share:
title: Top 0.1% - Wealth share
unit: "%"
short_unit: "%"
description_short: The share of wealth owned by the richest 0.1%.
presentation:
title_public: Wealth share of the richest 0.1%
display:
name: Wealth share of the richest 0.1%
numDecimalPlaces: 1
tolerance: 5

34 changes: 34 additions & 0 deletions etl/steps/data/garden/chartbook/2024-05-23/wealth_france.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Load a meadow dataset and create a garden dataset."""

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load meadow dataset.
ds_meadow = paths.load_dataset("wealth_france")

# Read table from meadow dataset.
tb = ds_meadow["wealth_france"].reset_index()

#
# Process data.
#

tb = tb.format(["country", "year"])

#
# Save outputs.
#
# Create a new garden dataset with the same metadata as the meadow dataset.
ds_garden = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata
)

# Save changes in the new garden dataset.
ds_garden.save()
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# NOTE: To learn more about the fields, hover over their names.
definitions:
common:
presentation:
topic_tags:
- Economic Inequality


# Learn more about the available fields:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
dataset:
update_period_days: 365


tables:
inequality_france_1999:
variables:
gini:
title: Gini coefficient
unit: ""
short_unit: ""
description_short: The [Gini coefficient](#dod:gini) measures inequality on a scale from 0 to 1. Higher values indicate higher inequality.
description_key:
- Income is ‘post-tax’ — measured after taxes have been paid and most government benefits have been received.
- Income has been equivalized – adjusted to account for the fact that people in the same household can share costs like rent and heating.
processing_level: minor
presentation:
title_public: Gini coefficient
display:
name: Gini coefficient
numDecimalPlaces: 2
tolerance: 5

32 changes: 32 additions & 0 deletions etl/steps/data/garden/insee/2024-03-21/inequality_france_1999.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Load a meadow dataset and create a garden dataset."""

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load meadow dataset.
ds_meadow = paths.load_dataset("inequality_france_1999")

# Read table from meadow dataset.
tb = ds_meadow["inequality_france_1999"].reset_index()

#
# Process data.
tb = tb.format(["country", "year"])

#
# Save outputs.
#
# Create a new garden dataset with the same metadata as the meadow dataset.
ds_garden = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata
)

# Save changes in the new garden dataset.
ds_garden.save()
35 changes: 35 additions & 0 deletions etl/steps/data/garden/insee/2024-04-05/inequality_france.meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# NOTE: To learn more about the fields, hover over their names.
definitions:
common:
processing_level: major
description_processing: |-
Each indicator is subdivided in several survey spells that are comparable over time. Different methodologies make spells not directly comparable between each other.
presentation:
topic_tags:
- Economic Inequality


# Learn more about the available fields:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
dataset:
update_period_days: 365


tables:
inequality_france:
variables:
gini:
title: Gini coefficient - Spell <<spell>>
unit: ""
short_unit: ""
description_short: The [Gini coefficient](#dod:gini) measures inequality on a scale from 0 to 1. Higher values indicate higher inequality.
description_key:
- Income is ‘post-tax’ — measured after taxes have been paid and most government benefits have been received.
- Income has been equivalized – adjusted to account for the fact that people in the same household can share costs like rent and heating.
presentation:
title_public: Gini coefficient
display:
name: Gini coefficient
numDecimalPlaces: 2
tolerance: 5

62 changes: 62 additions & 0 deletions etl/steps/data/garden/insee/2024-04-05/inequality_france.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""Load a meadow dataset and create a garden dataset."""

from owid.catalog import Table

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load meadow dataset.
ds_meadow = paths.load_dataset("inequality_france")

# Read table from meadow dataset.
tb = ds_meadow["inequality_france"].reset_index()

#
# Process data.
tb = select_gini_and_create_spells(tb)

tb = tb.format(["country", "year", "spell"])

#
# Save outputs.
#
# Create a new garden dataset with the same metadata as the meadow dataset.
ds_garden = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata
)

# Save changes in the new garden dataset.
ds_garden.save()


def select_gini_and_create_spells(tb: Table) -> Table:
"""Select only 'Indice de Gini' indicator and create spells."""
# Select only 'Indice de Gini' indicator.
tb = tb[tb["indicator"] == "Indice de Gini"].reset_index(drop=True)

# Split year column into two columns: year and spell. The year column is the first four characters of the year column.
tb["year_new"] = tb["year"].str[:4]

# Define spell as boolean where year is the same as the one before.
tb["spell"] = tb["year_new"] == tb["year_new"].shift(1)

# Whenever spell is True, set spell to an increasing number.
tb["spell"] = tb["spell"].cumsum() + 1

# Drop year and indicator columns and rename year_new to year.
tb = tb.drop(columns=["year", "indicator"]).rename(columns={"year_new": "year"})

# Add country column.
tb["country"] = "France"

# Rename value to gini.
tb = tb.rename(columns={"value": "gini"})

return tb
Loading

0 comments on commit 7d0ee83

Please sign in to comment.