From 6fc0ea17f222673669926391660089ce9381da5b Mon Sep 17 00:00:00 2001 From: owidbot Date: Thu, 30 Jan 2025 14:43:24 +0000 Subject: [PATCH] fasttrack: fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1.csv --- dag/fasttrack.yml | 2 + ...donor_costs_dac_countries__sheet1.meta.yml | 27 +++++++++++++ ...23_in_donor_costs_dac_countries__sheet1.py | 40 +++++++++++++++++++ ..._donor_costs_dac_countries__sheet1.csv.dvc | 16 ++++++++ 4 files changed, 85 insertions(+) create mode 100644 etl/steps/data/grapher/fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1.meta.yml create mode 100644 etl/steps/data/grapher/fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1.py create mode 100644 snapshots/fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1.csv.dvc diff --git a/dag/fasttrack.yml b/dag/fasttrack.yml index c1d24e6a5fb..82182b9c0c4 100644 --- a/dag/fasttrack.yml +++ b/dag/fasttrack.yml @@ -228,3 +228,5 @@ steps: - snapshot://fasttrack/latest/cdc_measles.csv data://grapher/fasttrack/latest/try_fast_track_in_donor_costs: - snapshot://fasttrack/latest/try_fast_track_in_donor_costs.csv + data://grapher/fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1: + - snapshot://fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1.csv diff --git a/etl/steps/data/grapher/fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1.meta.yml b/etl/steps/data/grapher/fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1.meta.yml new file mode 100644 index 00000000000..2c57324f32d --- /dev/null +++ b/etl/steps/data/grapher/fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1.meta.yml @@ -0,0 +1,27 @@ +dataset: + title: DRAFT 1 – 2018-2023 In-donor costs DAC countries - Sheet1 + description: '' +tables: + _1__2018_2023_in_donor_costs_dac_countries__sheet1: + variables: + debt_relief: + title: Debt Relief + unit: '' + students: + title: Students + unit: '' + refugees: + title: Refugees + unit: '' + development_awareness: + title: Development Awareness + unit: '' + administrative: + title: Administrative + unit: '' + research: + title: Research + unit: '' + total: + title: Total + unit: '' diff --git a/etl/steps/data/grapher/fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1.py b/etl/steps/data/grapher/fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1.py new file mode 100644 index 00000000000..7dbbe733c24 --- /dev/null +++ b/etl/steps/data/grapher/fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1.py @@ -0,0 +1,40 @@ +import pandas as pd + +from etl.helpers import PathFinder, create_dataset, get_metadata_path +from etl.snapshot import Snapshot + +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # load snapshot + snap = Snapshot("fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1.csv") + + # load data + tb = snap.read_csv() + + # add dimensions with dim_ prefix + dims = [c for c in tb.columns if c.startswith("dim_")] + dims_without_prefix = [c[4:] for c in dims] + + if dims: + tb = tb.rename(columns={d: dw for d, dw in zip(dims, dims_without_prefix)}) + + if uses_dates(tb["year"]): + tb = tb.rename(columns={"year": "date"}).format(["country", "date"] + dims_without_prefix) + else: + tb = tb.format(["country", "year"] + dims_without_prefix) + + # add table, update metadata from *.meta.yml and save + ds = create_dataset(dest_dir, tables=[tb], default_metadata=snap.metadata) + + # override metadata if necessary + meta_path = get_metadata_path(dest_dir).with_suffix(".override.yml") + if meta_path.exists(): + ds.update_metadata(meta_path) + + ds.save() + + +def uses_dates(s: pd.Series) -> bool: + return pd.to_datetime(s, errors="coerce", format="%Y-%m-%d").notnull().all() diff --git a/snapshots/fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1.csv.dvc b/snapshots/fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1.csv.dvc new file mode 100644 index 00000000000..6b619e8cca9 --- /dev/null +++ b/snapshots/fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1.csv.dvc @@ -0,0 +1,16 @@ +meta: + origin: + producer: Unknown + title: Unknown + citation_full: Unknown + version_producer: Local CSV + url_main: https://unknown.com + url_download: 1 – 2018-2023 In-donor costs DAC countries - Sheet1.csv + date_accessed: '2025-01-30' + date_published: '2025-01-30' + name: DRAFT 1 – 2018-2023 In-donor costs DAC countries - Sheet1 + description: '' +outs: + - md5: 05de6b53e56e8079da3f1d8a17de680d + size: 442 + path: _1__2018_2023_in_donor_costs_dac_countries__sheet1.csv