Skip to content

Commit

Permalink
fasttrack: fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countrie…
Browse files Browse the repository at this point in the history
…s__sheet1.csv
  • Loading branch information
owidbot committed Jan 30, 2025
1 parent b6d23e0 commit 6fc0ea1
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 0 deletions.
2 changes: 2 additions & 0 deletions dag/fasttrack.yml
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,5 @@ steps:
- snapshot://fasttrack/latest/cdc_measles.csv
data://grapher/fasttrack/latest/try_fast_track_in_donor_costs:
- snapshot://fasttrack/latest/try_fast_track_in_donor_costs.csv
data://grapher/fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1:
- snapshot://fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
dataset:
title: DRAFT 1 – 2018-2023 In-donor costs DAC countries - Sheet1
description: ''
tables:
_1__2018_2023_in_donor_costs_dac_countries__sheet1:
variables:
debt_relief:
title: Debt Relief
unit: ''
students:
title: Students
unit: ''
refugees:
title: Refugees
unit: ''
development_awareness:
title: Development Awareness
unit: ''
administrative:
title: Administrative
unit: ''
research:
title: Research
unit: ''
total:
title: Total
unit: ''
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import pandas as pd

from etl.helpers import PathFinder, create_dataset, get_metadata_path
from etl.snapshot import Snapshot

paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
# load snapshot
snap = Snapshot("fasttrack/latest/_1__2018_2023_in_donor_costs_dac_countries__sheet1.csv")

# load data
tb = snap.read_csv()

# add dimensions with dim_ prefix
dims = [c for c in tb.columns if c.startswith("dim_")]
dims_without_prefix = [c[4:] for c in dims]

if dims:
tb = tb.rename(columns={d: dw for d, dw in zip(dims, dims_without_prefix)})

if uses_dates(tb["year"]):
tb = tb.rename(columns={"year": "date"}).format(["country", "date"] + dims_without_prefix)
else:
tb = tb.format(["country", "year"] + dims_without_prefix)

# add table, update metadata from *.meta.yml and save
ds = create_dataset(dest_dir, tables=[tb], default_metadata=snap.metadata)

# override metadata if necessary
meta_path = get_metadata_path(dest_dir).with_suffix(".override.yml")
if meta_path.exists():
ds.update_metadata(meta_path)

ds.save()


def uses_dates(s: pd.Series) -> bool:
return pd.to_datetime(s, errors="coerce", format="%Y-%m-%d").notnull().all()
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
meta:
origin:
producer: Unknown
title: Unknown
citation_full: Unknown
version_producer: Local CSV
url_main: https://unknown.com
url_download: 1 – 2018-2023 In-donor costs DAC countries - Sheet1.csv
date_accessed: '2025-01-30'
date_published: '2025-01-30'
name: DRAFT 1 – 2018-2023 In-donor costs DAC countries - Sheet1
description: ''
outs:
- md5: 05de6b53e56e8079da3f1d8a17de680d
size: 442
path: _1__2018_2023_in_donor_costs_dac_countries__sheet1.csv

0 comments on commit 6fc0ea1

Please sign in to comment.