From 6c34ec107b407831f5cfc4d0912c66e92a8501b7 Mon Sep 17 00:00:00 2001 From: Tuna Acisu Date: Thu, 20 Jun 2024 18:07:22 +0200 Subject: [PATCH 01/17] add snapshot --- .../happiness/2024-06-20/happiness_ages.py | 25 ++++++++++++++ .../2024-06-20/happiness_ages.xls.dvc | 34 +++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 snapshots/happiness/2024-06-20/happiness_ages.py create mode 100644 snapshots/happiness/2024-06-20/happiness_ages.xls.dvc diff --git a/snapshots/happiness/2024-06-20/happiness_ages.py b/snapshots/happiness/2024-06-20/happiness_ages.py new file mode 100644 index 00000000000..0c25a52c2e2 --- /dev/null +++ b/snapshots/happiness/2024-06-20/happiness_ages.py @@ -0,0 +1,25 @@ +"""Script to create a snapshot of dataset.""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +@click.option("--path-to-file", prompt=True, type=str, help="Path to local data file.") +def main(path_to_file: str, upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"happiness/{SNAPSHOT_VERSION}/happiness_ages.xls") + + # Copy local data file to snapshots data folder, add file to DVC and upload to S3. + snap.create_snapshot(filename=path_to_file, upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/happiness/2024-06-20/happiness_ages.xls.dvc b/snapshots/happiness/2024-06-20/happiness_ages.xls.dvc new file mode 100644 index 00000000000..334d582d84b --- /dev/null +++ b/snapshots/happiness/2024-06-20/happiness_ages.xls.dvc @@ -0,0 +1,34 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: World Happiness Report + description: |- + The World Happiness Report is a partnership of Gallup, the Oxford Wellbeing Research Centre, the UN Sustainable Development Solutions Network, and the WHR’s Editorial Board. + It reviews the state of happiness in the world today and shows how the science of happiness explains personal and national variations in happiness. + date_published: "2024-03-08" + version_producer: 2024 + title_snapshot: World Happiness Report - Happiness by age group + description_snapshot: |- + Happiness measured for different age groups for each country; Age groups are under 30 years old, 30-44 years old, 45-60 years old and over 60 years old. + + # Citation + producer: Wellbeing Research Centre + citation_full: |- + Helliwell, J. F., Layard, R., Sachs, J. D., De Neve, J.-E., Aknin, L. B., & Wang, S. (Eds.). (2024). World Happiness Report 2024. University of Oxford: Wellbeing Research Centre. + attribution_short: WHR + + # Files + url_main: https://worldhappiness.report/ed/2024/ + date_accessed: 2024-06-20 + + # License + license: + name: "" + url: https://worldhappiness.report/ed/2024/ + +outs: + - md5: 512ec7ed6d2c698c1474c35adedc402e + size: 705536 + path: happiness_ages.xls From 2580b2e642e053c1a788ab47149d972711068bbd Mon Sep 17 00:00:00 2001 From: Tuna Acisu Date: Fri, 21 Jun 2024 11:46:41 +0200 Subject: [PATCH 02/17] add meadow, garden and grapher step --- dag/main.yml | 8 ++- .../2024-06-20/happiness_ages.countries.json | 2 + .../happiness_ages.excluded_countries.json | 2 + .../2024-06-20/happiness_ages.meta.yml | 53 +++++++++++++++++++ .../happiness/2024-06-20/happiness_ages.py | 37 +++++++++++++ .../happiness/2024-06-20/happiness_ages.py | 32 +++++++++++ .../happiness/2024-06-20/happiness_ages.py | 32 +++++++++++ .../2024-06-20/happiness_ages.xls.dvc | 2 +- 8 files changed, 165 insertions(+), 3 deletions(-) create mode 100644 etl/steps/data/garden/happiness/2024-06-20/happiness_ages.countries.json create mode 100644 etl/steps/data/garden/happiness/2024-06-20/happiness_ages.excluded_countries.json create mode 100644 etl/steps/data/garden/happiness/2024-06-20/happiness_ages.meta.yml create mode 100644 etl/steps/data/garden/happiness/2024-06-20/happiness_ages.py create mode 100644 etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.py create mode 100644 etl/steps/data/meadow/happiness/2024-06-20/happiness_ages.py diff --git a/dag/main.yml b/dag/main.yml index 185a9041166..fbc5d57d941 100644 --- a/dag/main.yml +++ b/dag/main.yml @@ -278,8 +278,12 @@ steps: data://grapher/happiness/2024-06-09/happiness: - data://garden/happiness/2024-06-09/happiness - - # LGBTI Policy Index (Velasco, 2020) + data://meadow/happiness/2024-06-20/happiness_ages: + - snapshot://happiness/2024-06-20/happiness_ages.xls + data://garden/happiness/2024-06-20/happiness_ages: + - data://meadow/happiness/2024-06-20/happiness_ages + data://grapher/happiness/2024-06-20/happiness_ages: + - data://garden/happiness/2024-06-20/happiness_ages data://meadow/lgbt_rights/2023-04-27/lgbti_policy_index: - snapshot://lgbt_rights/2023-04-27/lgbti_policy_index.xlsx data://garden/lgbt_rights/2023-04-27/lgbti_policy_index: diff --git a/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.countries.json b/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.countries.json new file mode 100644 index 00000000000..2c63c085104 --- /dev/null +++ b/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.countries.json @@ -0,0 +1,2 @@ +{ +} diff --git a/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.excluded_countries.json b/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.excluded_countries.json new file mode 100644 index 00000000000..0d4f101c7a3 --- /dev/null +++ b/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.excluded_countries.json @@ -0,0 +1,2 @@ +[ +] diff --git a/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.meta.yml b/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.meta.yml new file mode 100644 index 00000000000..44a4cfdd7a8 --- /dev/null +++ b/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.meta.yml @@ -0,0 +1,53 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Happiness & Life Satisfaction + + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + + +tables: + happiness_ages: + variables: + # testing_variable: + # title: Testing variable title + # unit: arbitrary units + # short_unit: au + # description_short: Short description of testing variable. + # description_processing: Description of processing of testing variable. + # description_key: List of key points about the indicator. + # description_from_producer: Description of testing variable from producer. + # processing_level: minor + # presentation: + # attribution: + # attribution_short: + # faqs: + # grapher_config: + # title_public: + # title_variant: + # topic_tags: + # display: + # color: + # conversionFactor: 1 + # description: + # entityAnnotationsMap: Test annotation + # includeInTable: + # isProjection: false + # name: Testing variable + # numDecimalPlaces: 0 + # shortUnit: au + # tableDisplay: + # hideAbsoluteChange: + # hideRelativeChange: + # tolerance: 0 + # unit: arbitrary units + # yearIsDay: false + # zeroDay: + {} + diff --git a/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.py b/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.py new file mode 100644 index 00000000000..595e48dc11b --- /dev/null +++ b/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.py @@ -0,0 +1,37 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("happiness_ages") + + # Read table from meadow dataset. + tb = ds_meadow["happiness_ages"].reset_index() + + # + # Process data. + # + tb = geo.harmonize_countries( + df=tb, countries_file=paths.country_mapping_path, excluded_countries_file=paths.excluded_countries_path + ) + tb = tb.format(["country", "year"]) + + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() diff --git a/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.py b/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.py new file mode 100644 index 00000000000..f20b4fd3607 --- /dev/null +++ b/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.py @@ -0,0 +1,32 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("happiness_ages") + + # Read table from garden dataset. + tb = ds_garden["happiness_ages"] + + # + # Process data. + # + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/meadow/happiness/2024-06-20/happiness_ages.py b/etl/steps/data/meadow/happiness/2024-06-20/happiness_ages.py new file mode 100644 index 00000000000..07940753642 --- /dev/null +++ b/etl/steps/data/meadow/happiness/2024-06-20/happiness_ages.py @@ -0,0 +1,32 @@ +"""Load a snapshot and create a meadow dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("happiness_ages.xls") + + # Load data from snapshot. + tb = snap.read() + + # + # Process data. + # + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + tb = tb.format(["country", "year"]) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/snapshots/happiness/2024-06-20/happiness_ages.xls.dvc b/snapshots/happiness/2024-06-20/happiness_ages.xls.dvc index 334d582d84b..68d1e6bf9da 100644 --- a/snapshots/happiness/2024-06-20/happiness_ages.xls.dvc +++ b/snapshots/happiness/2024-06-20/happiness_ages.xls.dvc @@ -11,7 +11,7 @@ meta: version_producer: 2024 title_snapshot: World Happiness Report - Happiness by age group description_snapshot: |- - Happiness measured for different age groups for each country; Age groups are under 30 years old, 30-44 years old, 45-60 years old and over 60 years old. + Happiness measured for different age groups for each country; Age groups are under 30 years old, 30-44 years old, 45-59 years old and 60 years old and above. # Citation producer: Wellbeing Research Centre From a60f65856c6151a1afa36612f0b12669811a708a Mon Sep 17 00:00:00 2001 From: Tuna Acisu Date: Fri, 21 Jun 2024 13:59:02 +0200 Subject: [PATCH 03/17] recover garden and meadow steps --- .../happiness/2024-06-20/happiness_ages.py | 50 +++++++++++++++++-- .../happiness/2024-06-20/happiness_ages.py | 22 ++++++-- 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.py b/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.py index 595e48dc11b..7830e9a6229 100644 --- a/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.py +++ b/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.py @@ -1,11 +1,12 @@ -"""Load a meadow dataset and create a garden dataset.""" - from etl.data_helpers import geo from etl.helpers import PathFinder, create_dataset # Get paths and naming conventions for current step. paths = PathFinder(__file__) +REGIONS = {reg: reg_dict for reg, reg_dict in geo.REGIONS.items() if reg != "European Union (27)"} +REGIONS.update({"World": {}}) + def run(dest_dir: str) -> None: # @@ -14,16 +15,55 @@ def run(dest_dir: str) -> None: # Load meadow dataset. ds_meadow = paths.load_dataset("happiness_ages") + # load datasets for aggregation + ds_population = paths.load_dataset("population", channel="garden") + ds_regions = paths.load_dataset("regions") + ds_income_groups = paths.load_dataset("income_groups") + # Read table from meadow dataset. tb = ds_meadow["happiness_ages"].reset_index() # # Process data. # - tb = geo.harmonize_countries( - df=tb, countries_file=paths.country_mapping_path, excluded_countries_file=paths.excluded_countries_path + # drop unneeded columns: + tb = tb.drop( + columns=[ + "region", + "age_group_code", + "stress_score", + "worry_score", + "happiness_count", + "stress_count", + "worry_count", + ] ) - tb = tb.format(["country", "year"]) + + # remove leading "Age " from age_group + tb["age_group"] = tb["age_group"].str.replace("Age ", "") + + # add regional aggregates + tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path) + + tb = geo.add_population_to_table(tb, ds_population) + + tb["happiness_times_pop"] = tb["happiness_score"] * tb["population"] + + aggr_score = {"happiness_times_pop": "sum", "population": "sum"} + tb = geo.add_regions_to_table( + tb, + aggregations=aggr_score, + regions=REGIONS, + ds_regions=ds_regions, + ds_income_groups=ds_income_groups, + index_columns=["country", "year", "age_group"], + min_num_values_per_year=1, + ) + tb["happiness_score"] = tb["happiness_times_pop"] / tb["population"] + + tb = tb.drop(columns=["happiness_times_pop"]) + + tb = tb.format(["country", "year", "age_group"]) # # Save outputs. diff --git a/etl/steps/data/meadow/happiness/2024-06-20/happiness_ages.py b/etl/steps/data/meadow/happiness/2024-06-20/happiness_ages.py index 07940753642..43a2684c342 100644 --- a/etl/steps/data/meadow/happiness/2024-06-20/happiness_ages.py +++ b/etl/steps/data/meadow/happiness/2024-06-20/happiness_ages.py @@ -5,6 +5,20 @@ # Get paths and naming conventions for current step. paths = PathFinder(__file__) +COLUMN_MAPPING = { + "Country name": "country", + "year": "year", + "Region indicator": "region", + "Age group code": "age_group_code", + "Age group": "age_group", + "Mean of ladder": "happiness_score", + "Mean of stress": "stress_score", + "Mean of worry": "worry_score", + "Count of ladder": "happiness_count", + "Count of stress": "stress_count", + "Count of worry": "worry_count", +} + def run(dest_dir: str) -> None: # @@ -16,11 +30,11 @@ def run(dest_dir: str) -> None: # Load data from snapshot. tb = snap.read() - # - # Process data. - # + # rename columns + tb = tb.rename(columns=COLUMN_MAPPING, errors="raise") + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. - tb = tb.format(["country", "year"]) + tb = tb.format(["country", "year", "age_group"]) # # Save outputs. From feb2692ca6dfb5a8eda67aeac7a70aef4f296ef4 Mon Sep 17 00:00:00 2001 From: Tuna Acisu Date: Fri, 21 Jun 2024 13:59:44 +0200 Subject: [PATCH 04/17] recover dag --- dag/main.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dag/main.yml b/dag/main.yml index fbc5d57d941..b6206864570 100644 --- a/dag/main.yml +++ b/dag/main.yml @@ -282,8 +282,14 @@ steps: - snapshot://happiness/2024-06-20/happiness_ages.xls data://garden/happiness/2024-06-20/happiness_ages: - data://meadow/happiness/2024-06-20/happiness_ages + - data://garden/demography/2023-03-31/population + - data://garden/regions/2023-01-01/regions + - data://garden/wb/2024-03-11/income_groups data://grapher/happiness/2024-06-20/happiness_ages: - data://garden/happiness/2024-06-20/happiness_ages + + + # LGBTI Policy Index (Velasco, 2020) data://meadow/lgbt_rights/2023-04-27/lgbti_policy_index: - snapshot://lgbt_rights/2023-04-27/lgbti_policy_index.xlsx data://garden/lgbt_rights/2023-04-27/lgbti_policy_index: From ff09c9aa4bf0f6980fec7883bddbdce39bf51ae5 Mon Sep 17 00:00:00 2001 From: Tuna Acisu Date: Fri, 21 Jun 2024 14:22:53 +0200 Subject: [PATCH 05/17] country harmonization --- .../2024-06-20/happiness_ages.countries.json | 62 ++++++++++++++++++- .../happiness_ages.excluded_countries.json | 2 - 2 files changed, 61 insertions(+), 3 deletions(-) delete mode 100644 etl/steps/data/garden/happiness/2024-06-20/happiness_ages.excluded_countries.json diff --git a/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.countries.json b/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.countries.json index 2c63c085104..4989900e05e 100644 --- a/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.countries.json +++ b/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.countries.json @@ -1,2 +1,62 @@ { -} + "Albania": "Albania", + "Argentina": "Argentina", + "Australia": "Australia", + "Austria": "Austria", + "Bangladesh": "Bangladesh", + "Belgium": "Belgium", + "Brazil": "Brazil", + "Bulgaria": "Bulgaria", + "Canada": "Canada", + "Chile": "Chile", + "China": "China", + "Croatia": "Croatia", + "Cyprus": "Cyprus", + "Czechia": "Czechia", + "Denmark": "Denmark", + "Egypt": "Egypt", + "Estonia": "Estonia", + "Finland": "Finland", + "France": "France", + "Germany": "Germany", + "Ghana": "Ghana", + "Greece": "Greece", + "Hungary": "Hungary", + "Iceland": "Iceland", + "India": "India", + "Indonesia": "Indonesia", + "Ireland": "Ireland", + "Italy": "Italy", + "Japan": "Japan", + "Jordan": "Jordan", + "Kenya": "Kenya", + "Latvia": "Latvia", + "Lithuania": "Lithuania", + "Luxembourg": "Luxembourg", + "Mexico": "Mexico", + "Netherlands": "Netherlands", + "New Zealand": "New Zealand", + "Nigeria": "Nigeria", + "Norway": "Norway", + "Pakistan": "Pakistan", + "Philippines": "Philippines", + "Poland": "Poland", + "Portugal": "Portugal", + "Romania": "Romania", + "Saudi Arabia": "Saudi Arabia", + "Serbia": "Serbia", + "Singapore": "Singapore", + "Slovakia": "Slovakia", + "Slovenia": "Slovenia", + "South Africa": "South Africa", + "Spain": "Spain", + "Sweden": "Sweden", + "Switzerland": "Switzerland", + "Thailand": "Thailand", + "United Arab Emirates": "United Arab Emirates", + "United Kingdom": "United Kingdom", + "United States": "United States", + "Vietnam": "Vietnam", + "Hong Kong S.A.R. of China": "Hong Kong", + "Taiwan Province of China": "Taiwan" +} \ No newline at end of file diff --git a/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.excluded_countries.json b/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.excluded_countries.json deleted file mode 100644 index 0d4f101c7a3..00000000000 --- a/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.excluded_countries.json +++ /dev/null @@ -1,2 +0,0 @@ -[ -] From 796dfed6cba3ea00e406c0858294475bfe085d5b Mon Sep 17 00:00:00 2001 From: Tuna Acisu Date: Fri, 21 Jun 2024 14:48:59 +0200 Subject: [PATCH 06/17] trigger build From bffd82df086879ce825207be65ab31c569438e4e Mon Sep 17 00:00:00 2001 From: Tuna Acisu Date: Fri, 21 Jun 2024 15:59:08 +0200 Subject: [PATCH 07/17] pivot table for grapher & add origins --- .../2024-06-20/happiness_ages.meta.yml | 53 ---------------- .../2024-06-20/happiness_ages.meta.yml | 62 +++++++++++++++++++ .../happiness/2024-06-20/happiness_ages.py | 42 ++++++++++++- 3 files changed, 102 insertions(+), 55 deletions(-) delete mode 100644 etl/steps/data/garden/happiness/2024-06-20/happiness_ages.meta.yml create mode 100644 etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.meta.yml diff --git a/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.meta.yml b/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.meta.yml deleted file mode 100644 index 44a4cfdd7a8..00000000000 --- a/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.meta.yml +++ /dev/null @@ -1,53 +0,0 @@ -# NOTE: To learn more about the fields, hover over their names. -definitions: - common: - presentation: - topic_tags: - - Happiness & Life Satisfaction - - -# Learn more about the available fields: -# http://docs.owid.io/projects/etl/architecture/metadata/reference/ -dataset: - update_period_days: 365 - - -tables: - happiness_ages: - variables: - # testing_variable: - # title: Testing variable title - # unit: arbitrary units - # short_unit: au - # description_short: Short description of testing variable. - # description_processing: Description of processing of testing variable. - # description_key: List of key points about the indicator. - # description_from_producer: Description of testing variable from producer. - # processing_level: minor - # presentation: - # attribution: - # attribution_short: - # faqs: - # grapher_config: - # title_public: - # title_variant: - # topic_tags: - # display: - # color: - # conversionFactor: 1 - # description: - # entityAnnotationsMap: Test annotation - # includeInTable: - # isProjection: false - # name: Testing variable - # numDecimalPlaces: 0 - # shortUnit: au - # tableDisplay: - # hideAbsoluteChange: - # hideRelativeChange: - # tolerance: 0 - # unit: arbitrary units - # yearIsDay: false - # zeroDay: - {} - diff --git a/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.meta.yml b/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.meta.yml new file mode 100644 index 00000000000..df858009baa --- /dev/null +++ b/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.meta.yml @@ -0,0 +1,62 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Happiness & Life Satisfaction + attribution_short: WHR + display: + numDecimalPlaces: 2 + processing_level: major + origins: + # Data product / Snapshot + - title: World Happiness Report + description: |- + The World Happiness Report is a partnership of Gallup, the Oxford Wellbeing Research Centre, the UN Sustainable Development Solutions Network, and the WHR’s Editorial Board. + It reviews the state of happiness in the world today and shows how the science of happiness explains personal and national variations in happiness. + date_published: "2024-03-08" + version_producer: 2024 + # Citation + producer: Wellbeing Research Centre + citation_full: |- + Helliwell, J. F., Layard, R., Sachs, J. D., De Neve, J.-E., Aknin, L. B., & Wang, S. (Eds.). (2024). World Happiness Report 2024. University of Oxford: Wellbeing Research Centre. + attribution_short: WHR + # Files + url_main: https://worldhappiness.report/ed/2024/ + date_accessed: 2024-06-20 + + # License + license: + name: "" + url: https://worldhappiness.report/ed/2024/ + + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + + +tables: + happiness_ages: + variables: + happiness_below_30: + title: Life satisfaction of people below 30 + unit: "" + short_unit: "" + description_short: "." + happiness_30_to_44: + title: Life satisfaction of people aged 30-44 + unit: "" + short_unit: "" + description_short: "." + happiness_45_to_59: + title: Life satisfaction of people aged 45-59 + unit: "" + short_unit: "" + description_short: "." + happiness_60_and_above: + title: Life satisfaction of people aged 60 and above + unit: "" + short_unit: "" + description_short: "." \ No newline at end of file diff --git a/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.py b/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.py index f20b4fd3607..e3fe860dfa2 100644 --- a/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.py +++ b/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.py @@ -1,11 +1,44 @@ """Load a garden dataset and create a grapher dataset.""" -from etl.helpers import PathFinder, create_dataset +import pandas as pd + +from etl.helpers import PathFinder, Table, create_dataset # Get paths and naming conventions for current step. paths = PathFinder(__file__) +def pivot_age_groups(tb): + new_tb_rows = [] + for cty in tb["country"].unique(): + cty_tb = tb[tb["country"] == cty] + for year in cty_tb["year"].unique(): + new_row_dict = {"country": cty, "year": year} + row_tb = cty_tb[cty_tb["year"] == year] + new_row_dict["happiness_below_30"] = row_tb[row_tb["age_group"] == "below 30"]["happiness_score"].values[0] + new_row_dict["happiness_30_to_44"] = row_tb[row_tb["age_group"] == "30-44"]["happiness_score"].values[0] + new_row_dict["happiness_45_to_59"] = row_tb[row_tb["age_group"] == "45-59"]["happiness_score"].values[0] + new_row_dict["happiness_60_and_above"] = row_tb[row_tb["age_group"] == "60 and above"][ + "happiness_score" + ].values[0] + new_tb_rows.append(new_row_dict) + tb_pivot = Table( + pd.DataFrame( + new_tb_rows, + columns=[ + "country", + "year", + "happiness_below_30", + "happiness_30_to_44", + "happiness_45_to_59", + "happiness_60_and_above", + ], + ) + ) + tb_pivot = tb_pivot.copy_metadata(tb) + return tb_pivot + + def run(dest_dir: str) -> None: # # Load inputs. @@ -14,11 +47,16 @@ def run(dest_dir: str) -> None: ds_garden = paths.load_dataset("happiness_ages") # Read table from garden dataset. - tb = ds_garden["happiness_ages"] + tb = ds_garden["happiness_ages"].reset_index() # # Process data. # + tb = tb.drop(columns=["population"]) + + tb = pivot_age_groups(tb) + + tb = tb.format(["country", "year"]) # # Save outputs. From 80f344240426df12e1f4a086b62604aa0f36a039 Mon Sep 17 00:00:00 2001 From: Tuna Acisu Date: Fri, 21 Jun 2024 16:05:58 +0200 Subject: [PATCH 08/17] fix build --- etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.py b/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.py index e3fe860dfa2..29405fe4a66 100644 --- a/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.py +++ b/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.py @@ -1,8 +1,9 @@ """Load a garden dataset and create a grapher dataset.""" import pandas as pd +from owid.catalog import Table -from etl.helpers import PathFinder, Table, create_dataset +from etl.helpers import PathFinder, create_dataset # Get paths and naming conventions for current step. paths = PathFinder(__file__) From 20a81b3a47cb63f982ddc930c68a21c859f5ffdd Mon Sep 17 00:00:00 2001 From: Tuna Acisu Date: Mon, 24 Jun 2024 14:54:08 +0200 Subject: [PATCH 09/17] move happiness by age group data into happiness dataset --- dag/main.yml | 8 +- .../happiness/2024-06-09/happiness.meta.yml | 2 +- .../garden/happiness/2024-06-09/happiness.py | 40 ++++++++-- .../2024-06-20/happiness_ages.countries.json | 62 --------------- .../happiness/2024-06-20/happiness_ages.py | 77 ------------------- .../happiness.meta.yml} | 30 ++++++-- .../grapher/happiness/2024-06-09/happiness.py | 56 +++++++++++++- .../happiness/2024-06-20/happiness_ages.py | 71 ----------------- 8 files changed, 116 insertions(+), 230 deletions(-) delete mode 100644 etl/steps/data/garden/happiness/2024-06-20/happiness_ages.countries.json delete mode 100644 etl/steps/data/garden/happiness/2024-06-20/happiness_ages.py rename etl/steps/data/grapher/happiness/{2024-06-20/happiness_ages.meta.yml => 2024-06-09/happiness.meta.yml} (50%) delete mode 100644 etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.py diff --git a/dag/main.yml b/dag/main.yml index b6206864570..5bc3262f15f 100644 --- a/dag/main.yml +++ b/dag/main.yml @@ -271,6 +271,7 @@ steps: data://garden/happiness/2024-06-09/happiness: - data://meadow/happiness/2024-06-09/happiness + - data://meadow/happiness/2024-06-20/happiness_ages - data://garden/happiness/2023-03-20/happiness - data://garden/demography/2023-03-31/population - data://garden/regions/2023-01-01/regions @@ -280,13 +281,6 @@ steps: data://meadow/happiness/2024-06-20/happiness_ages: - snapshot://happiness/2024-06-20/happiness_ages.xls - data://garden/happiness/2024-06-20/happiness_ages: - - data://meadow/happiness/2024-06-20/happiness_ages - - data://garden/demography/2023-03-31/population - - data://garden/regions/2023-01-01/regions - - data://garden/wb/2024-03-11/income_groups - data://grapher/happiness/2024-06-20/happiness_ages: - - data://garden/happiness/2024-06-20/happiness_ages # LGBTI Policy Index (Velasco, 2020) diff --git a/etl/steps/data/garden/happiness/2024-06-09/happiness.meta.yml b/etl/steps/data/garden/happiness/2024-06-09/happiness.meta.yml index 31e6c6f43ef..91499d7179f 100644 --- a/etl/steps/data/garden/happiness/2024-06-09/happiness.meta.yml +++ b/etl/steps/data/garden/happiness/2024-06-09/happiness.meta.yml @@ -13,7 +13,7 @@ dataset: tables: happiness: variables: - cantril_ladder_score: + happiness_score: title: Cantril ladder score unit: "" short_unit: "" diff --git a/etl/steps/data/garden/happiness/2024-06-09/happiness.py b/etl/steps/data/garden/happiness/2024-06-09/happiness.py index 3edd2c6b15e..37e919ae2b3 100644 --- a/etl/steps/data/garden/happiness/2024-06-09/happiness.py +++ b/etl/steps/data/garden/happiness/2024-06-09/happiness.py @@ -22,6 +22,8 @@ def run(dest_dir: str) -> None: ds_prev_years = paths.load_dataset("happiness", channel="garden", version="2023-03-20") ds_population = paths.load_dataset("population", channel="garden") + ds_happiness_ages = paths.load_dataset("happiness_ages") + # Load regions dataset. ds_regions = paths.load_dataset("regions") @@ -37,8 +39,35 @@ def run(dest_dir: str) -> None: cols_overlap = ["country", "cantril_ladder_score", "year"] tb = pr.concat([tb_this_year[cols_overlap], tb_prev_years], ignore_index=True) + tb_ages = ds_happiness_ages["happiness_ages"].reset_index() + # Harmonize country names tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path) + tb_ages = geo.harmonize_countries( + df=tb_ages, countries_file=paths.country_mapping_path, warn_on_missing_countries=False + ) + + # drop unneeded columns from age table + tb_ages = tb_ages.drop( + columns=[ + "region", + "age_group_code", + "stress_score", + "worry_score", + "happiness_count", + "stress_count", + "worry_count", + ] + ) + + # remove leading "Age " from age_group + tb_ages["age_group"] = tb_ages["age_group"].str.replace("Age ", "") + + # Concatenate table happiness data with table happiness data by age group + tb["age_group"] = "all ages" + tb["happiness_score"] = tb["cantril_ladder_score"] + tb = tb.drop(columns=["cantril_ladder_score"]) + tb = pr.concat([tb, tb_ages], ignore_index=True) # Process data (add population weighted averages for continents & income groups) @@ -52,28 +81,29 @@ def run(dest_dir: str) -> None: # calculate population weighted averages by multiplying the population with the cantril ladder score # and then summing and dividing by the total population - tb["cantril_times_pop"] = tb["cantril_ladder_score"] * tb["population"] + tb["happiness_times_pop"] = tb["happiness_score"] * tb["population"] - aggr_score = {"cantril_times_pop": "sum", "population": "sum"} + aggr_score = {"happiness_times_pop": "sum", "population": "sum"} tb = geo.add_regions_to_table( tb, aggregations=aggr_score, regions=REGIONS, ds_regions=ds_regions, ds_income_groups=ds_income_groups, + index_columns=["country", "year", "age_group"], min_num_values_per_year=1, ) # Divide the sum of the cantril ladder score times population by the total population - tb["cantril_ladder_score"] = tb["cantril_times_pop"] / tb["population"] + tb["happiness_score"] = tb["happiness_times_pop"] / tb["population"] # drop unneeded columns - tb = tb.drop(columns=["cantril_times_pop"]) + tb = tb.drop(columns=["happiness_times_pop"]) # add back Northern Cyprus and Somaliland tb = pr.concat([tb, tb_countries_wo_population], ignore_index=True) - tb = tb.format(["country", "year"]) + tb = tb.format(["country", "year", "age_group"]) # Save outputs. # diff --git a/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.countries.json b/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.countries.json deleted file mode 100644 index 4989900e05e..00000000000 --- a/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.countries.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "Albania": "Albania", - "Argentina": "Argentina", - "Australia": "Australia", - "Austria": "Austria", - "Bangladesh": "Bangladesh", - "Belgium": "Belgium", - "Brazil": "Brazil", - "Bulgaria": "Bulgaria", - "Canada": "Canada", - "Chile": "Chile", - "China": "China", - "Croatia": "Croatia", - "Cyprus": "Cyprus", - "Czechia": "Czechia", - "Denmark": "Denmark", - "Egypt": "Egypt", - "Estonia": "Estonia", - "Finland": "Finland", - "France": "France", - "Germany": "Germany", - "Ghana": "Ghana", - "Greece": "Greece", - "Hungary": "Hungary", - "Iceland": "Iceland", - "India": "India", - "Indonesia": "Indonesia", - "Ireland": "Ireland", - "Italy": "Italy", - "Japan": "Japan", - "Jordan": "Jordan", - "Kenya": "Kenya", - "Latvia": "Latvia", - "Lithuania": "Lithuania", - "Luxembourg": "Luxembourg", - "Mexico": "Mexico", - "Netherlands": "Netherlands", - "New Zealand": "New Zealand", - "Nigeria": "Nigeria", - "Norway": "Norway", - "Pakistan": "Pakistan", - "Philippines": "Philippines", - "Poland": "Poland", - "Portugal": "Portugal", - "Romania": "Romania", - "Saudi Arabia": "Saudi Arabia", - "Serbia": "Serbia", - "Singapore": "Singapore", - "Slovakia": "Slovakia", - "Slovenia": "Slovenia", - "South Africa": "South Africa", - "Spain": "Spain", - "Sweden": "Sweden", - "Switzerland": "Switzerland", - "Thailand": "Thailand", - "United Arab Emirates": "United Arab Emirates", - "United Kingdom": "United Kingdom", - "United States": "United States", - "Vietnam": "Vietnam", - "Hong Kong S.A.R. of China": "Hong Kong", - "Taiwan Province of China": "Taiwan" -} \ No newline at end of file diff --git a/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.py b/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.py deleted file mode 100644 index 7830e9a6229..00000000000 --- a/etl/steps/data/garden/happiness/2024-06-20/happiness_ages.py +++ /dev/null @@ -1,77 +0,0 @@ -from etl.data_helpers import geo -from etl.helpers import PathFinder, create_dataset - -# Get paths and naming conventions for current step. -paths = PathFinder(__file__) - -REGIONS = {reg: reg_dict for reg, reg_dict in geo.REGIONS.items() if reg != "European Union (27)"} -REGIONS.update({"World": {}}) - - -def run(dest_dir: str) -> None: - # - # Load inputs. - # - # Load meadow dataset. - ds_meadow = paths.load_dataset("happiness_ages") - - # load datasets for aggregation - ds_population = paths.load_dataset("population", channel="garden") - ds_regions = paths.load_dataset("regions") - ds_income_groups = paths.load_dataset("income_groups") - - # Read table from meadow dataset. - tb = ds_meadow["happiness_ages"].reset_index() - - # - # Process data. - # - # drop unneeded columns: - tb = tb.drop( - columns=[ - "region", - "age_group_code", - "stress_score", - "worry_score", - "happiness_count", - "stress_count", - "worry_count", - ] - ) - - # remove leading "Age " from age_group - tb["age_group"] = tb["age_group"].str.replace("Age ", "") - - # add regional aggregates - tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path) - - tb = geo.add_population_to_table(tb, ds_population) - - tb["happiness_times_pop"] = tb["happiness_score"] * tb["population"] - - aggr_score = {"happiness_times_pop": "sum", "population": "sum"} - tb = geo.add_regions_to_table( - tb, - aggregations=aggr_score, - regions=REGIONS, - ds_regions=ds_regions, - ds_income_groups=ds_income_groups, - index_columns=["country", "year", "age_group"], - min_num_values_per_year=1, - ) - tb["happiness_score"] = tb["happiness_times_pop"] / tb["population"] - - tb = tb.drop(columns=["happiness_times_pop"]) - - tb = tb.format(["country", "year", "age_group"]) - - # - # Save outputs. - # - # Create a new garden dataset with the same metadata as the meadow dataset. - ds_garden = create_dataset( - dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata - ) - - # Save changes in the new garden dataset. - ds_garden.save() diff --git a/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.meta.yml b/etl/steps/data/grapher/happiness/2024-06-09/happiness.meta.yml similarity index 50% rename from etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.meta.yml rename to etl/steps/data/grapher/happiness/2024-06-09/happiness.meta.yml index df858009baa..0932c4c4b5c 100644 --- a/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.meta.yml +++ b/etl/steps/data/grapher/happiness/2024-06-09/happiness.meta.yml @@ -24,11 +24,20 @@ definitions: # Files url_main: https://worldhappiness.report/ed/2024/ date_accessed: 2024-06-20 - - # License + # License license: name: "" url: https://worldhappiness.report/ed/2024/ + description_processing: Average of regions is calculated by taking a population-weighted average over all countries within that region. + description_key: + - The Cantril ladder asks respondents to think of a ladder, with the best possible life for them being a 10 and the worst possible life being a 0. They are then asked to rate their own current lives on that 0 to 10 scale. + - The rankings are calculated by the source based on nationally representative samples for the three years prior to the year of the report, so that data for the 2024 report will draw from survey data from 2021-2023. We show the data for final year of the three-year survey period, i.e. we show the 2021-2023 survey data as 2023. + - The only exception is the data for the 2012 report, which uses survey data from 2005-2011, we show this data as the final year of the survey data - 2011. + - The number of people and countries surveyed varies year to year, but typically more than 100,000 people in 130 countries participate in the Gallup World Poll each year. + - The rankings are based entirely on the survey scores, using the Gallup weights to make the estimates representative. + - The data is the compilation of all previous World Happiness Reports, which can be found at https://worldhappiness.report/archive/. + + # Learn more about the available fields: @@ -38,13 +47,13 @@ dataset: tables: - happiness_ages: + happiness: variables: happiness_below_30: title: Life satisfaction of people below 30 unit: "" short_unit: "" - description_short: "." + description_short: "" happiness_30_to_44: title: Life satisfaction of people aged 30-44 unit: "" @@ -59,4 +68,15 @@ tables: title: Life satisfaction of people aged 60 and above unit: "" short_unit: "" - description_short: "." \ No newline at end of file + description_short: "." + happiness_all_ages: + title: Cantril ladder score + unit: "" + short_unit: "" + description_short: Average of survey responses to the 'Cantril Ladder' question in the Gallup World Poll. The survey question asks respondents to think of a ladder, with the best possible life for them being a 10, and the worst possible life being a 0. + presentation: + attribution: World Happiness Report (2012-2024) + attribution_short: WHR + title_public: Self-reported life satisfaction + display: + numDecimalPlaces: 2 diff --git a/etl/steps/data/grapher/happiness/2024-06-09/happiness.py b/etl/steps/data/grapher/happiness/2024-06-09/happiness.py index e2afb7700ca..289205377f9 100644 --- a/etl/steps/data/grapher/happiness/2024-06-09/happiness.py +++ b/etl/steps/data/grapher/happiness/2024-06-09/happiness.py @@ -1,10 +1,51 @@ """Load a garden dataset and create a grapher dataset.""" +import pandas as pd +from owid.catalog import Table from etl.helpers import PathFinder, create_dataset # Get paths and naming conventions for current step. paths = PathFinder(__file__) +AGES_COLUMNS = [ + "happiness_below_30", + "happiness_30_to_44", + "happiness_45_to_59", + "happiness_60_and_above", + "happiness_all_ages", +] + + +def pivot_age_groups(tb: Table): + new_tb_rows = [] + for cty in tb["country"].unique(): + cty_tb = tb[tb["country"] == cty] + for year in cty_tb["year"].unique(): + new_row_dict = {"country": cty, "year": year} + row_tb = cty_tb[cty_tb["year"] == year] + ages = ["below 30", "30-44", "45-59", "60 and above", "all ages"] + for idx in range(len(ages)): + age_entry = ages[idx] + age_column = AGES_COLUMNS[idx] + age_row = row_tb[row_tb["age_group"] == age_entry] + if len(age_row) == 0: + new_row_dict[age_column] = pd.NA + else: + new_row_dict[age_column] = age_row["happiness_score"].values[0] + new_tb_rows.append(new_row_dict) + tb_pivot = Table( + pd.DataFrame( + new_tb_rows, + columns=[ + "country", + "year", + ] + + AGES_COLUMNS, + ) + ) + tb_pivot = tb_pivot.copy_metadata(tb) + return tb_pivot + def run(dest_dir: str) -> None: # @@ -14,13 +55,24 @@ def run(dest_dir: str) -> None: ds_garden = paths.load_dataset("happiness") # Read table from garden dataset. - tb = ds_garden["happiness"] + tb = ds_garden["happiness"].reset_index() + + # pivot table + tb = tb.drop(columns=["population"]) + + tb = pivot_age_groups(tb) + + for age_col in AGES_COLUMNS: + tb[age_col] = tb[age_col].astype("Float64") + + tb = tb.format(["country", "year"]) # Save outputs. # # Create a new grapher dataset with the same metadata as the garden dataset. + # origins get added in grapher dataset, so do not warn about missing origins. ds_grapher = create_dataset( - dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata + dest_dir, tables=[tb], check_variables_metadata=False, default_metadata=ds_garden.metadata ) # Save changes in the new grapher dataset. diff --git a/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.py b/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.py deleted file mode 100644 index 29405fe4a66..00000000000 --- a/etl/steps/data/grapher/happiness/2024-06-20/happiness_ages.py +++ /dev/null @@ -1,71 +0,0 @@ -"""Load a garden dataset and create a grapher dataset.""" - -import pandas as pd -from owid.catalog import Table - -from etl.helpers import PathFinder, create_dataset - -# Get paths and naming conventions for current step. -paths = PathFinder(__file__) - - -def pivot_age_groups(tb): - new_tb_rows = [] - for cty in tb["country"].unique(): - cty_tb = tb[tb["country"] == cty] - for year in cty_tb["year"].unique(): - new_row_dict = {"country": cty, "year": year} - row_tb = cty_tb[cty_tb["year"] == year] - new_row_dict["happiness_below_30"] = row_tb[row_tb["age_group"] == "below 30"]["happiness_score"].values[0] - new_row_dict["happiness_30_to_44"] = row_tb[row_tb["age_group"] == "30-44"]["happiness_score"].values[0] - new_row_dict["happiness_45_to_59"] = row_tb[row_tb["age_group"] == "45-59"]["happiness_score"].values[0] - new_row_dict["happiness_60_and_above"] = row_tb[row_tb["age_group"] == "60 and above"][ - "happiness_score" - ].values[0] - new_tb_rows.append(new_row_dict) - tb_pivot = Table( - pd.DataFrame( - new_tb_rows, - columns=[ - "country", - "year", - "happiness_below_30", - "happiness_30_to_44", - "happiness_45_to_59", - "happiness_60_and_above", - ], - ) - ) - tb_pivot = tb_pivot.copy_metadata(tb) - return tb_pivot - - -def run(dest_dir: str) -> None: - # - # Load inputs. - # - # Load garden dataset. - ds_garden = paths.load_dataset("happiness_ages") - - # Read table from garden dataset. - tb = ds_garden["happiness_ages"].reset_index() - - # - # Process data. - # - tb = tb.drop(columns=["population"]) - - tb = pivot_age_groups(tb) - - tb = tb.format(["country", "year"]) - - # - # Save outputs. - # - # Create a new grapher dataset with the same metadata as the garden dataset. - ds_grapher = create_dataset( - dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata - ) - - # Save changes in the new grapher dataset. - ds_grapher.save() From 4a7d80d8d04ef3a69275caae1c252d9f0267ca0f Mon Sep 17 00:00:00 2001 From: Tuna Acisu Date: Tue, 25 Jun 2024 18:12:46 +0100 Subject: [PATCH 10/17] improve descriptions --- .../happiness/2024-06-09/happiness.meta.yml | 32 +++++++------------ 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/etl/steps/data/grapher/happiness/2024-06-09/happiness.meta.yml b/etl/steps/data/grapher/happiness/2024-06-09/happiness.meta.yml index 0932c4c4b5c..5db5370e759 100644 --- a/etl/steps/data/grapher/happiness/2024-06-09/happiness.meta.yml +++ b/etl/steps/data/grapher/happiness/2024-06-09/happiness.meta.yml @@ -8,6 +8,8 @@ definitions: display: numDecimalPlaces: 2 processing_level: major + unit: "" + short_unit: "" origins: # Data product / Snapshot - title: World Happiness Report @@ -28,14 +30,18 @@ definitions: license: name: "" url: https://worldhappiness.report/ed/2024/ + description_short: + "Self-reported life satisfaction is the answer to the question: + 'Imagine a ladder with the best possible life being a 10, and the worst possible life being a 0. + Which step on that ladder would you say you stand on right now?'" description_processing: Average of regions is calculated by taking a population-weighted average over all countries within that region. description_key: - - The Cantril ladder asks respondents to think of a ladder, with the best possible life for them being a 10 and the worst possible life being a 0. They are then asked to rate their own current lives on that 0 to 10 scale. - - The rankings are calculated by the source based on nationally representative samples for the three years prior to the year of the report, so that data for the 2024 report will draw from survey data from 2021-2023. We show the data for final year of the three-year survey period, i.e. we show the 2021-2023 survey data as 2023. - - The only exception is the data for the 2012 report, which uses survey data from 2005-2011, we show this data as the final year of the survey data - 2011. - - The number of people and countries surveyed varies year to year, but typically more than 100,000 people in 130 countries participate in the Gallup World Poll each year. - - The rankings are based entirely on the survey scores, using the Gallup weights to make the estimates representative. - - The data is the compilation of all previous World Happiness Reports, which can be found at https://worldhappiness.report/archive/. + - The Cantril ladder asks respondents to think of a ladder, with the best possible life for them being a 10 and the worst possible life being a 0. They are then asked to rate their own current lives on that 0 to 10 scale. + - The rankings are calculated by the source based on nationally representative samples for the three years prior to the year of the report, so that data for the 2024 report will draw from survey data from 2021-2023. We show the data for final year of the three-year survey period, i.e. we show the 2021-2023 survey data as 2023. + - The only exception is the data for the 2012 report, which uses survey data from 2005-2011, we show this data as the final year of the survey data - 2011. + - The number of people and countries surveyed varies year to year, but typically more than 100,000 people in 130 countries participate in the Gallup World Poll each year. + - The rankings are based entirely on the survey scores, using the Gallup weights to make the estimates representative. + - The data is the compilation of all previous World Happiness Reports, which can be found at https://worldhappiness.report/archive/. @@ -51,28 +57,14 @@ tables: variables: happiness_below_30: title: Life satisfaction of people below 30 - unit: "" - short_unit: "" - description_short: "" happiness_30_to_44: title: Life satisfaction of people aged 30-44 - unit: "" - short_unit: "" - description_short: "." happiness_45_to_59: title: Life satisfaction of people aged 45-59 - unit: "" - short_unit: "" - description_short: "." happiness_60_and_above: title: Life satisfaction of people aged 60 and above - unit: "" - short_unit: "" - description_short: "." happiness_all_ages: title: Cantril ladder score - unit: "" - short_unit: "" description_short: Average of survey responses to the 'Cantril Ladder' question in the Gallup World Poll. The survey question asks respondents to think of a ladder, with the best possible life for them being a 10, and the worst possible life being a 0. presentation: attribution: World Happiness Report (2012-2024) From ee4eeeafa707a8b7aeafbaad00319bedf28c6002 Mon Sep 17 00:00:00 2001 From: Tuna Acisu Date: Wed, 26 Jun 2024 14:08:56 +0100 Subject: [PATCH 11/17] remove regional aggregates where data is missing & update metadata --- .../garden/happiness/2024-06-09/happiness.py | 55 +++++++++++++------ .../happiness/2024-06-09/happiness.meta.yml | 4 +- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/etl/steps/data/garden/happiness/2024-06-09/happiness.py b/etl/steps/data/garden/happiness/2024-06-09/happiness.py index 37e919ae2b3..3ab103e3229 100644 --- a/etl/steps/data/garden/happiness/2024-06-09/happiness.py +++ b/etl/steps/data/garden/happiness/2024-06-09/happiness.py @@ -9,25 +9,22 @@ paths = PathFinder(__file__) -REGIONS = {reg: reg_dict for reg, reg_dict in geo.REGIONS.items() if reg != "European Union (27)"} -REGIONS.update({"World": {}}) +ALL_REGIONS = {reg: reg_dict for reg, reg_dict in geo.REGIONS.items() if reg != "European Union (27)"} +ALL_REGIONS.update({"World": {}}) def run(dest_dir: str) -> None: # # Load inputs. # - # Load meadow dataset, previous years and population data. + # Load datasets: meadow dataset (latest happiness report), previous years, happiness by ages + # for regional aggregates: population dataset, regions dataset, income groups dataset ds_meadow = paths.load_dataset("happiness", version="2024-06-09") ds_prev_years = paths.load_dataset("happiness", channel="garden", version="2023-03-20") - ds_population = paths.load_dataset("population", channel="garden") - ds_happiness_ages = paths.load_dataset("happiness_ages") - # Load regions dataset. + ds_population = paths.load_dataset("population", channel="garden") ds_regions = paths.load_dataset("regions") - - # Load income groups dataset. ds_income_groups = paths.load_dataset("income_groups") # Read table datasets. @@ -39,14 +36,15 @@ def run(dest_dir: str) -> None: cols_overlap = ["country", "cantril_ladder_score", "year"] tb = pr.concat([tb_this_year[cols_overlap], tb_prev_years], ignore_index=True) + # Read table including happiness by age group tb_ages = ds_happiness_ages["happiness_ages"].reset_index() # Harmonize country names tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path) tb_ages = geo.harmonize_countries( - df=tb_ages, countries_file=paths.country_mapping_path, warn_on_missing_countries=False + df=tb_ages, countries_file=paths.country_mapping_path, warn_on_unused_countries=False ) - + # Process happiness by age group data # drop unneeded columns from age table tb_ages = tb_ages.drop( columns=[ @@ -59,35 +57,56 @@ def run(dest_dir: str) -> None: "worry_count", ] ) - # remove leading "Age " from age_group tb_ages["age_group"] = tb_ages["age_group"].str.replace("Age ", "") - # Concatenate table happiness data with table happiness data by age group + # standardize happiness by age group and happiness data tb["age_group"] = "all ages" tb["happiness_score"] = tb["cantril_ladder_score"] tb = tb.drop(columns=["cantril_ladder_score"]) - tb = pr.concat([tb, tb_ages], ignore_index=True) - - # Process data (add population weighted averages for continents & income groups) + # + # Add population weighted averages for continents & income groups) + # # save data of Northern Cyrpus and Somaliland to concat later (they do not have population in population dataset) countries_no_pop_msk = tb["country"].isin(["Northern Cyprus", "Somaliland"]) tb_countries_wo_population = tb[countries_no_pop_msk] tb = tb[~countries_no_pop_msk] - # add population to table + # add population to tables tb = geo.add_population_to_table(tb, ds_population) + tb_ages = geo.add_population_to_table(tb_ages, ds_population) # calculate population weighted averages by multiplying the population with the cantril ladder score # and then summing and dividing by the total population tb["happiness_times_pop"] = tb["happiness_score"] * tb["population"] + tb_ages["happiness_times_pop"] = tb_ages["happiness_score"] * tb_ages["population"] + + # set population to NaN where happiness_score is NaN + tb["population"] = tb["population"].where(~tb["happiness_score"].isna(), other=None) + tb_ages["population"] = tb_ages["population"].where(~tb_ages["happiness_score"].isna(), other=None) aggr_score = {"happiness_times_pop": "sum", "population": "sum"} tb = geo.add_regions_to_table( tb, aggregations=aggr_score, - regions=REGIONS, + regions=ALL_REGIONS, + ds_regions=ds_regions, + ds_income_groups=ds_income_groups, + index_columns=["country", "year", "age_group"], + min_num_values_per_year=1, + ) + + # For happiness by age group, remove all regions where less than 50% of the population is covered + # Manual check: Africa and Low income regions are not sufficiently covered + regions_for_age_groups = { + reg: reg_dict for reg, reg_dict in ALL_REGIONS.items() if reg not in ["Africa", "Low-income countries"] + } + + tb_ages = geo.add_regions_to_table( + tb_ages, + aggregations=aggr_score, + regions=regions_for_age_groups, ds_regions=ds_regions, ds_income_groups=ds_income_groups, index_columns=["country", "year", "age_group"], @@ -95,6 +114,8 @@ def run(dest_dir: str) -> None: ) # Divide the sum of the cantril ladder score times population by the total population + # concatenate the two tables + tb = pr.concat([tb, tb_ages], ignore_index=True) tb["happiness_score"] = tb["happiness_times_pop"] / tb["population"] # drop unneeded columns diff --git a/etl/steps/data/grapher/happiness/2024-06-09/happiness.meta.yml b/etl/steps/data/grapher/happiness/2024-06-09/happiness.meta.yml index 5db5370e759..811aecc043f 100644 --- a/etl/steps/data/grapher/happiness/2024-06-09/happiness.meta.yml +++ b/etl/steps/data/grapher/happiness/2024-06-09/happiness.meta.yml @@ -34,7 +34,9 @@ definitions: "Self-reported life satisfaction is the answer to the question: 'Imagine a ladder with the best possible life being a 10, and the worst possible life being a 0. Which step on that ladder would you say you stand on right now?'" - description_processing: Average of regions is calculated by taking a population-weighted average over all countries within that region. + description_processing: + Average of regions is calculated by taking a population-weighted average over all countries within that region. + Since data per age group is not available for all countries, regional aggregates can sometimes differ. description_key: - The Cantril ladder asks respondents to think of a ladder, with the best possible life for them being a 10 and the worst possible life being a 0. They are then asked to rate their own current lives on that 0 to 10 scale. - The rankings are calculated by the source based on nationally representative samples for the three years prior to the year of the report, so that data for the 2024 report will draw from survey data from 2021-2023. We show the data for final year of the three-year survey period, i.e. we show the 2021-2023 survey data as 2023. From b1c4805eef82e008c419da1cbf94ae126b548922 Mon Sep 17 00:00:00 2001 From: Tuna Acisu Date: Wed, 26 Jun 2024 14:53:14 +0100 Subject: [PATCH 12/17] trigger build From 2594511815a9935cfd2ca9532f1ed8977b538c6e Mon Sep 17 00:00:00 2001 From: Tuna Acisu Date: Fri, 28 Jun 2024 11:14:41 +0100 Subject: [PATCH 13/17] PR review comments (fix pivoting and metadata) --- .../happiness/2024-06-09/happiness.meta.yml | 89 +++++++++++++------ .../garden/happiness/2024-06-09/happiness.py | 31 +++++-- .../grapher/happiness/2024-06-09/happiness.py | 49 +--------- 3 files changed, 86 insertions(+), 83 deletions(-) diff --git a/etl/steps/data/garden/happiness/2024-06-09/happiness.meta.yml b/etl/steps/data/garden/happiness/2024-06-09/happiness.meta.yml index 91499d7179f..66a10a08554 100644 --- a/etl/steps/data/garden/happiness/2024-06-09/happiness.meta.yml +++ b/etl/steps/data/garden/happiness/2024-06-09/happiness.meta.yml @@ -1,34 +1,71 @@ +# NOTE: To learn more about the fields, hover over their names. definitions: common: - processing_level: major presentation: topic_tags: - Happiness & Life Satisfaction + attribution_short: WHR + display: + numDecimalPlaces: 2 + processing_level: major + unit: "" + short_unit: "" + origins: + # Data product / Snapshot + - title: World Happiness Report + description: |- + The World Happiness Report is a partnership of Gallup, the Oxford Wellbeing Research Centre, the UN Sustainable Development Solutions Network, and the WHR’s Editorial Board. + It reviews the state of happiness in the world today and shows how the science of happiness explains personal and national variations in happiness. + date_published: "2024-03-08" + version_producer: 2024 + # Citation + producer: Wellbeing Research Centre + citation_full: |- + Helliwell, J. F., Layard, R., Sachs, J. D., De Neve, J.-E., Aknin, L. B., & Wang, S. (Eds.). (2024). World Happiness Report 2024. University of Oxford: Wellbeing Research Centre. + attribution_short: WHR + # Files + url_main: https://worldhappiness.report/ed/2024/ + date_accessed: 2024-06-20 + # License + license: + name: "" + url: https://worldhappiness.report/ed/2024/ + description_short: + "Self-reported life satisfaction is the answer to the question: + 'Imagine a ladder with the best possible life being a 10, and the worst possible life being a 0. + Which step on that ladder would you say you stand on right now?'" + description_processing: + Average of regions is calculated by taking a population-weighted average over all countries within that region. + Since data per age group is not available for all countries, regional aggregates can sometimes differ. + description_key: + - The Cantril ladder asks respondents to think of a ladder, with the best possible life for them being a 10 and the worst possible life being a 0. They are then asked to rate their own current lives on that 0 to 10 scale. + - The rankings are calculated by the source based on nationally representative samples for the three years prior to the year of the report, so that data for the 2024 report will draw from survey data from 2021-2023. We show the data for final year of the three-year survey period, i.e. we show the 2021-2023 survey data as 2023. + - The only exception is the data for the 2012 report, which uses survey data from 2005-2011, we show this data as the final year of the survey data - 2011. + - The number of people and countries surveyed varies year to year, but typically more than 100,000 people in 130 countries participate in the Gallup World Poll each year. + - The rankings are based entirely on the survey scores, using the Gallup weights to make the estimates representative. + - The data is the compilation of all previous World Happiness Reports, which can be found at https://worldhappiness.report/archive/. + metric: + title: + <% if age_group == "below 30"%> + Life satisfaction of people below 30 + <% elsif age_group != "all ages"%> + Life satisfaction of people aged <> + <% elsif age_group == "all ages"%> + Cantril ladder score + <% endif %> + description_short: + <% if age_group == "all ages"%> + Average of survey responses to the 'Cantril Ladder' question in the Gallup World Poll. The survey question asks respondents to think of a ladder, with the best possible life for them being a 10, and the worst possible life being a 0. + <% endif %> + presentation: + attribution: + <% if age_group == "all ages"%> + World Happiness Report (2012-2024) + title_public: + <% if age_group == "all ages"%> + Self-reported life satisfaction -dataset: - update_period_days: 365 - -tables: - happiness: - variables: - happiness_score: - title: Cantril ladder score - unit: "" - short_unit: "" - description_short: Average of survey responses to the 'Cantril Ladder' question in the Gallup World Poll. The survey question asks respondents to think of a ladder, with the best possible life for them being a 10, and the worst possible life being a 0. - description_processing: Average of regions is calculated by taking a population-weighted average over all countries within that region. - description_key: - - The Cantril ladder asks respondents to think of a ladder, with the best possible life for them being a 10 and the worst possible life being a 0. They are then asked to rate their own current lives on that 0 to 10 scale. - - The rankings are calculated by the source based on nationally representative samples for the three years prior to the year of the report, so that data for the 2024 report will draw from survey data from 2021-2023. We show the data for final year of the three-year survey period, i.e. we show the 2021-2023 survey data as 2023. - - The only exception is the data for the 2012 report, which uses survey data from 2005-2011, we show this data as the final year of the survey data - 2011. - - The number of people and countries surveyed varies year to year, but typically more than 100,000 people in 130 countries participate in the Gallup World Poll each year. - - The rankings are based entirely on the survey scores, using the Gallup weights to make the estimates representative. - - The data is the compilation of all previous World Happiness Reports, which can be found at https://worldhappiness.report/archive/. - presentation: - attribution: World Happiness Report (2012-2024) - attribution_short: WHR - title_public: Self-reported life satisfaction - display: - numDecimalPlaces: 2 \ No newline at end of file +dataset: + update_period_days: 365 \ No newline at end of file diff --git a/etl/steps/data/garden/happiness/2024-06-09/happiness.py b/etl/steps/data/garden/happiness/2024-06-09/happiness.py index 3ab103e3229..87dab77e96d 100644 --- a/etl/steps/data/garden/happiness/2024-06-09/happiness.py +++ b/etl/steps/data/garden/happiness/2024-06-09/happiness.py @@ -1,6 +1,6 @@ """Load a meadow dataset and create a garden dataset.""" - import owid.catalog.processing as pr +from owid.catalog import Dataset, Table from etl.data_helpers import geo from etl.helpers import PathFinder, create_dataset @@ -13,6 +13,23 @@ ALL_REGIONS.update({"World": {}}) +def remove_regions_below_population_threshold( + tb: Table, regions: dict, ds_population: Dataset, threshold: float +) -> Table: + """ + Check the share of population covered by the regions in the table. + """ + msk = tb["country"].isin(regions.keys()) + tb_region = tb[msk] + tb_no_regions = tb[~msk] + tb_region = geo.add_population_to_table(tb_region, ds_population, population_col="total_population") + tb_region["share_population"] = tb_region["population"] / tb_region["total_population"] + tb_region = tb_region[tb_region["share_population"] >= threshold] + tb_region = tb_region.drop(columns=["total_population", "share_population"]) + tb = pr.concat([tb_region, tb_no_regions]) + return tb + + def run(dest_dir: str) -> None: # # Load inputs. @@ -83,7 +100,6 @@ def run(dest_dir: str) -> None: tb_ages["happiness_times_pop"] = tb_ages["happiness_score"] * tb_ages["population"] # set population to NaN where happiness_score is NaN - tb["population"] = tb["population"].where(~tb["happiness_score"].isna(), other=None) tb_ages["population"] = tb_ages["population"].where(~tb_ages["happiness_score"].isna(), other=None) aggr_score = {"happiness_times_pop": "sum", "population": "sum"} @@ -97,22 +113,19 @@ def run(dest_dir: str) -> None: min_num_values_per_year=1, ) - # For happiness by age group, remove all regions where less than 50% of the population is covered - # Manual check: Africa and Low income regions are not sufficiently covered - regions_for_age_groups = { - reg: reg_dict for reg, reg_dict in ALL_REGIONS.items() if reg not in ["Africa", "Low-income countries"] - } - tb_ages = geo.add_regions_to_table( tb_ages, aggregations=aggr_score, - regions=regions_for_age_groups, + regions=ALL_REGIONS, ds_regions=ds_regions, ds_income_groups=ds_income_groups, index_columns=["country", "year", "age_group"], min_num_values_per_year=1, ) + # For happiness by age group, remove all regions where less than 50% of the population is covered + tb_ages = remove_regions_below_population_threshold(tb_ages, ALL_REGIONS, ds_population, threshold=0.5) + # Divide the sum of the cantril ladder score times population by the total population # concatenate the two tables tb = pr.concat([tb, tb_ages], ignore_index=True) diff --git a/etl/steps/data/grapher/happiness/2024-06-09/happiness.py b/etl/steps/data/grapher/happiness/2024-06-09/happiness.py index 289205377f9..b26d0a61be2 100644 --- a/etl/steps/data/grapher/happiness/2024-06-09/happiness.py +++ b/etl/steps/data/grapher/happiness/2024-06-09/happiness.py @@ -1,51 +1,9 @@ """Load a garden dataset and create a grapher dataset.""" -import pandas as pd -from owid.catalog import Table - from etl.helpers import PathFinder, create_dataset # Get paths and naming conventions for current step. paths = PathFinder(__file__) -AGES_COLUMNS = [ - "happiness_below_30", - "happiness_30_to_44", - "happiness_45_to_59", - "happiness_60_and_above", - "happiness_all_ages", -] - - -def pivot_age_groups(tb: Table): - new_tb_rows = [] - for cty in tb["country"].unique(): - cty_tb = tb[tb["country"] == cty] - for year in cty_tb["year"].unique(): - new_row_dict = {"country": cty, "year": year} - row_tb = cty_tb[cty_tb["year"] == year] - ages = ["below 30", "30-44", "45-59", "60 and above", "all ages"] - for idx in range(len(ages)): - age_entry = ages[idx] - age_column = AGES_COLUMNS[idx] - age_row = row_tb[row_tb["age_group"] == age_entry] - if len(age_row) == 0: - new_row_dict[age_column] = pd.NA - else: - new_row_dict[age_column] = age_row["happiness_score"].values[0] - new_tb_rows.append(new_row_dict) - tb_pivot = Table( - pd.DataFrame( - new_tb_rows, - columns=[ - "country", - "year", - ] - + AGES_COLUMNS, - ) - ) - tb_pivot = tb_pivot.copy_metadata(tb) - return tb_pivot - def run(dest_dir: str) -> None: # @@ -60,12 +18,7 @@ def run(dest_dir: str) -> None: # pivot table tb = tb.drop(columns=["population"]) - tb = pivot_age_groups(tb) - - for age_col in AGES_COLUMNS: - tb[age_col] = tb[age_col].astype("Float64") - - tb = tb.format(["country", "year"]) + tb = tb.format(["country", "year", "age_group"]) # Save outputs. # From 20fccbfe9a77862dba5b81f3bbf40d471d437b95 Mon Sep 17 00:00:00 2001 From: Tuna Acisu Date: Fri, 28 Jun 2024 12:53:52 +0100 Subject: [PATCH 14/17] edit metadata --- .../happiness/2024-06-09/happiness.meta.yml | 44 ++++++++--- .../garden/happiness/2024-06-09/happiness.py | 3 + .../happiness/2024-06-09/happiness.meta.yml | 76 ------------------- .../grapher/happiness/2024-06-09/happiness.py | 7 +- 4 files changed, 38 insertions(+), 92 deletions(-) delete mode 100644 etl/steps/data/grapher/happiness/2024-06-09/happiness.meta.yml diff --git a/etl/steps/data/garden/happiness/2024-06-09/happiness.meta.yml b/etl/steps/data/garden/happiness/2024-06-09/happiness.meta.yml index 66a10a08554..cf588497513 100644 --- a/etl/steps/data/garden/happiness/2024-06-09/happiness.meta.yml +++ b/etl/steps/data/garden/happiness/2024-06-09/happiness.meta.yml @@ -8,8 +8,6 @@ definitions: display: numDecimalPlaces: 2 processing_level: major - unit: "" - short_unit: "" origins: # Data product / Snapshot - title: World Happiness Report @@ -46,26 +44,52 @@ definitions: - The data is the compilation of all previous World Happiness Reports, which can be found at https://worldhappiness.report/archive/. metric: title: - <% if age_group == "below 30"%> + <% if age_group == "below 30" %> Life satisfaction of people below 30 - <% elsif age_group != "all ages"%> - Life satisfaction of people aged <> - <% elsif age_group == "all ages"%> + <% elif age_group == "30-44" %> + Life satisfaction of people aged 30-44 + <% elif age_group == "45-59" %> + Life satisfaction of people aged 45-59 + <% elif age_group == "60 and above" %> + Life satisfaction of people aged 60 and above + <% elif age_group == "all ages" %> Cantril ladder score <% endif %> + unit: "" + short_unit: "" description_short: - <% if age_group == "all ages"%> + <% if age_group == "all ages" %> Average of survey responses to the 'Cantril Ladder' question in the Gallup World Poll. The survey question asks respondents to think of a ladder, with the best possible life for them being a 10, and the worst possible life being a 0. <% endif %> presentation: attribution: - <% if age_group == "all ages"%> + <% if age_group == "all ages" %> World Happiness Report (2012-2024) + <% endif %> title_public: - <% if age_group == "all ages"%> + <% if age_group == "all ages" %> Self-reported life satisfaction + <% endif %> + dataset: - update_period_days: 365 \ No newline at end of file + update_period_days: 365 + + +tables: + happiness: + variables: + happiness_score: + title: |- + {definitions.metric.title} + unit: |- + {definitions.metric.unit} + description_short: |- + {definitions.metric.description_short} + presentation: + attribution: |- + {definitions.metric.presentation.attribution} + title_public: |- + {definitions.metric.presentation.title_public} diff --git a/etl/steps/data/garden/happiness/2024-06-09/happiness.py b/etl/steps/data/garden/happiness/2024-06-09/happiness.py index 87dab77e96d..7ae578d6ea4 100644 --- a/etl/steps/data/garden/happiness/2024-06-09/happiness.py +++ b/etl/steps/data/garden/happiness/2024-06-09/happiness.py @@ -137,6 +137,9 @@ def run(dest_dir: str) -> None: # add back Northern Cyprus and Somaliland tb = pr.concat([tb, tb_countries_wo_population], ignore_index=True) + # drop population + tb = tb.drop(columns=["population"]) + tb = tb.format(["country", "year", "age_group"]) # Save outputs. diff --git a/etl/steps/data/grapher/happiness/2024-06-09/happiness.meta.yml b/etl/steps/data/grapher/happiness/2024-06-09/happiness.meta.yml deleted file mode 100644 index 811aecc043f..00000000000 --- a/etl/steps/data/grapher/happiness/2024-06-09/happiness.meta.yml +++ /dev/null @@ -1,76 +0,0 @@ -# NOTE: To learn more about the fields, hover over their names. -definitions: - common: - presentation: - topic_tags: - - Happiness & Life Satisfaction - attribution_short: WHR - display: - numDecimalPlaces: 2 - processing_level: major - unit: "" - short_unit: "" - origins: - # Data product / Snapshot - - title: World Happiness Report - description: |- - The World Happiness Report is a partnership of Gallup, the Oxford Wellbeing Research Centre, the UN Sustainable Development Solutions Network, and the WHR’s Editorial Board. - It reviews the state of happiness in the world today and shows how the science of happiness explains personal and national variations in happiness. - date_published: "2024-03-08" - version_producer: 2024 - # Citation - producer: Wellbeing Research Centre - citation_full: |- - Helliwell, J. F., Layard, R., Sachs, J. D., De Neve, J.-E., Aknin, L. B., & Wang, S. (Eds.). (2024). World Happiness Report 2024. University of Oxford: Wellbeing Research Centre. - attribution_short: WHR - # Files - url_main: https://worldhappiness.report/ed/2024/ - date_accessed: 2024-06-20 - # License - license: - name: "" - url: https://worldhappiness.report/ed/2024/ - description_short: - "Self-reported life satisfaction is the answer to the question: - 'Imagine a ladder with the best possible life being a 10, and the worst possible life being a 0. - Which step on that ladder would you say you stand on right now?'" - description_processing: - Average of regions is calculated by taking a population-weighted average over all countries within that region. - Since data per age group is not available for all countries, regional aggregates can sometimes differ. - description_key: - - The Cantril ladder asks respondents to think of a ladder, with the best possible life for them being a 10 and the worst possible life being a 0. They are then asked to rate their own current lives on that 0 to 10 scale. - - The rankings are calculated by the source based on nationally representative samples for the three years prior to the year of the report, so that data for the 2024 report will draw from survey data from 2021-2023. We show the data for final year of the three-year survey period, i.e. we show the 2021-2023 survey data as 2023. - - The only exception is the data for the 2012 report, which uses survey data from 2005-2011, we show this data as the final year of the survey data - 2011. - - The number of people and countries surveyed varies year to year, but typically more than 100,000 people in 130 countries participate in the Gallup World Poll each year. - - The rankings are based entirely on the survey scores, using the Gallup weights to make the estimates representative. - - The data is the compilation of all previous World Happiness Reports, which can be found at https://worldhappiness.report/archive/. - - - - -# Learn more about the available fields: -# http://docs.owid.io/projects/etl/architecture/metadata/reference/ -dataset: - update_period_days: 365 - - -tables: - happiness: - variables: - happiness_below_30: - title: Life satisfaction of people below 30 - happiness_30_to_44: - title: Life satisfaction of people aged 30-44 - happiness_45_to_59: - title: Life satisfaction of people aged 45-59 - happiness_60_and_above: - title: Life satisfaction of people aged 60 and above - happiness_all_ages: - title: Cantril ladder score - description_short: Average of survey responses to the 'Cantril Ladder' question in the Gallup World Poll. The survey question asks respondents to think of a ladder, with the best possible life for them being a 10, and the worst possible life being a 0. - presentation: - attribution: World Happiness Report (2012-2024) - attribution_short: WHR - title_public: Self-reported life satisfaction - display: - numDecimalPlaces: 2 diff --git a/etl/steps/data/grapher/happiness/2024-06-09/happiness.py b/etl/steps/data/grapher/happiness/2024-06-09/happiness.py index b26d0a61be2..95d875888b0 100644 --- a/etl/steps/data/grapher/happiness/2024-06-09/happiness.py +++ b/etl/steps/data/grapher/happiness/2024-06-09/happiness.py @@ -13,12 +13,7 @@ def run(dest_dir: str) -> None: ds_garden = paths.load_dataset("happiness") # Read table from garden dataset. - tb = ds_garden["happiness"].reset_index() - - # pivot table - tb = tb.drop(columns=["population"]) - - tb = tb.format(["country", "year", "age_group"]) + tb = ds_garden["happiness"] # Save outputs. # From 07048e534e11d4f520e0ef592293ac5cf7ebfbc8 Mon Sep 17 00:00:00 2001 From: Tuna Acisu Date: Fri, 28 Jun 2024 14:15:11 +0100 Subject: [PATCH 15/17] new staging server From 698e6c6c96db3e8da671969045eddd95d1b6f41b Mon Sep 17 00:00:00 2001 From: Tuna Acisu Date: Mon, 1 Jul 2024 12:04:32 +0100 Subject: [PATCH 16/17] fix charts and metadata --- .../data/garden/happiness/2024-06-09/happiness.meta.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/etl/steps/data/garden/happiness/2024-06-09/happiness.meta.yml b/etl/steps/data/garden/happiness/2024-06-09/happiness.meta.yml index cf588497513..e387616b2f3 100644 --- a/etl/steps/data/garden/happiness/2024-06-09/happiness.meta.yml +++ b/etl/steps/data/garden/happiness/2024-06-09/happiness.meta.yml @@ -69,6 +69,14 @@ definitions: title_public: <% if age_group == "all ages" %> Self-reported life satisfaction + <% elif age_group == "below 30" %> + Life satisfaction of people below 30 + <% elif age_group == "30-44" %> + Life satisfaction of people aged 30-44 + <% elif age_group == "45-59" %> + Life satisfaction of people aged 45-59 + <% elif age_group == "60 and above" %> + Life satisfaction of people aged 60 and above <% endif %> From d0453d141667cb9c77adda4ff58503cd69a8913b Mon Sep 17 00:00:00 2001 From: Tuna Acisu Date: Mon, 1 Jul 2024 12:08:31 +0100 Subject: [PATCH 17/17] trigger build