diff --git a/dag/archive/main.yml b/dag/archive/main.yml index 09449e35d84..bf53b69a855 100644 --- a/dag/archive/main.yml +++ b/dag/archive/main.yml @@ -263,6 +263,14 @@ steps: data://grapher/countries/2023-10-01/gleditsch: - data://garden/countries/2023-09-25/gleditsch + # International Monetary Fund, World Economic Outlook + data://meadow/imf/2024-05-02/world_economic_outlook: + - snapshot://imf/2024-05-02/world_economic_outlook.xls + data://garden/imf/2024-05-02/world_economic_outlook: + - data://meadow/imf/2024-05-02/world_economic_outlook + data://grapher/imf/2024-05-02/world_economic_outlook: + - data://garden/imf/2024-05-02/world_economic_outlook + include: # Include all active steps plus all archive steps. - dag/main.yml diff --git a/dag/main.yml b/dag/main.yml index 9b772539002..7d4f3ee84bb 100644 --- a/dag/main.yml +++ b/dag/main.yml @@ -280,14 +280,6 @@ steps: data://grapher/eth/2023-03-15/ethnic_power_relations: - data://garden/eth/2023-03-15/ethnic_power_relations - # International Monetary Fund, World Economic Outlook - data://meadow/imf/2024-05-02/world_economic_outlook: - - snapshot://imf/2024-05-02/world_economic_outlook.xls - data://garden/imf/2024-05-02/world_economic_outlook: - - data://meadow/imf/2024-05-02/world_economic_outlook - data://grapher/imf/2024-05-02/world_economic_outlook: - - data://garden/imf/2024-05-02/world_economic_outlook - # Patents & journal articles (World Bank, United Nations) data://garden/research_development/2024-05-20/patents_articles: - data://garden/worldbank_wdi/2024-05-20/wdi @@ -878,6 +870,14 @@ steps: data://grapher/migration/2024-11-18/migration_between_regions: - data://garden/migration/2024-11-18/migration_between_regions + # IMF World Economic Outlook + data://meadow/imf/2024-11-25/world_economic_outlook: + - snapshot://imf/2024-11-25/world_economic_outlook.xls + data://garden/imf/2024-11-25/world_economic_outlook: + - data://meadow/imf/2024-11-25/world_economic_outlook + data://grapher/imf/2024-11-25/world_economic_outlook: + - data://garden/imf/2024-11-25/world_economic_outlook + include: - dag/open_numbers.yml - dag/faostat.yml diff --git a/etl/steps/data/garden/imf/2024-11-25/world_economic_outlook.countries.json b/etl/steps/data/garden/imf/2024-11-25/world_economic_outlook.countries.json new file mode 100644 index 00000000000..7b3c8a6d039 --- /dev/null +++ b/etl/steps/data/garden/imf/2024-11-25/world_economic_outlook.countries.json @@ -0,0 +1,198 @@ +{ + "Afghanistan": "Afghanistan", + "Albania": "Albania", + "Algeria": "Algeria", + "Andorra": "Andorra", + "Angola": "Angola", + "Antigua and Barbuda": "Antigua and Barbuda", + "Argentina": "Argentina", + "Armenia": "Armenia", + "Aruba": "Aruba", + "Australia": "Australia", + "Austria": "Austria", + "Azerbaijan": "Azerbaijan", + "Bahrain": "Bahrain", + "Bangladesh": "Bangladesh", + "Barbados": "Barbados", + "Belarus": "Belarus", + "Belgium": "Belgium", + "Belize": "Belize", + "Benin": "Benin", + "Bhutan": "Bhutan", + "Bolivia": "Bolivia", + "Bosnia and Herzegovina": "Bosnia and Herzegovina", + "Botswana": "Botswana", + "Brazil": "Brazil", + "Brunei Darussalam": "Brunei", + "Bulgaria": "Bulgaria", + "Burkina Faso": "Burkina Faso", + "Burundi": "Burundi", + "Cabo Verde": "Cape Verde", + "Cambodia": "Cambodia", + "Cameroon": "Cameroon", + "Canada": "Canada", + "Central African Republic": "Central African Republic", + "Chad": "Chad", + "Chile": "Chile", + "China": "China", + "Colombia": "Colombia", + "Comoros": "Comoros", + "Costa Rica": "Costa Rica", + "Croatia": "Croatia", + "Cyprus": "Cyprus", + "Czech Republic": "Czechia", + "C\u00f4te d'Ivoire": "Cote d'Ivoire", + "Democratic Republic of the Congo": "Democratic Republic of Congo", + "Denmark": "Denmark", + "Djibouti": "Djibouti", + "Dominica": "Dominica", + "Dominican Republic": "Dominican Republic", + "Ecuador": "Ecuador", + "Egypt": "Egypt", + "El Salvador": "El Salvador", + "Equatorial Guinea": "Equatorial Guinea", + "Eritrea": "Eritrea", + "Estonia": "Estonia", + "Eswatini": "Eswatini", + "Ethiopia": "Ethiopia", + "Fiji": "Fiji", + "Finland": "Finland", + "France": "France", + "Gabon": "Gabon", + "Georgia": "Georgia", + "Germany": "Germany", + "Ghana": "Ghana", + "Greece": "Greece", + "Grenada": "Grenada", + "Guatemala": "Guatemala", + "Guinea": "Guinea", + "Guinea-Bissau": "Guinea-Bissau", + "Guyana": "Guyana", + "Haiti": "Haiti", + "Honduras": "Honduras", + "Hong Kong SAR": "Hong Kong", + "Hungary": "Hungary", + "Iceland": "Iceland", + "India": "India", + "Indonesia": "Indonesia", + "Iraq": "Iraq", + "Ireland": "Ireland", + "Israel": "Israel", + "Italy": "Italy", + "Jamaica": "Jamaica", + "Japan": "Japan", + "Jordan": "Jordan", + "Kazakhstan": "Kazakhstan", + "Kenya": "Kenya", + "Kiribati": "Kiribati", + "Kosovo": "Kosovo", + "Kuwait": "Kuwait", + "Kyrgyz Republic": "Kyrgyzstan", + "Latvia": "Latvia", + "Lebanon": "Lebanon", + "Lesotho": "Lesotho", + "Liberia": "Liberia", + "Libya": "Libya", + "Lithuania": "Lithuania", + "Luxembourg": "Luxembourg", + "Madagascar": "Madagascar", + "Malawi": "Malawi", + "Malaysia": "Malaysia", + "Maldives": "Maldives", + "Mali": "Mali", + "Malta": "Malta", + "Marshall Islands": "Marshall Islands", + "Mauritania": "Mauritania", + "Mauritius": "Mauritius", + "Mexico": "Mexico", + "Micronesia": "Micronesia (country)", + "Moldova": "Moldova", + "Mongolia": "Mongolia", + "Montenegro": "Montenegro", + "Morocco": "Morocco", + "Mozambique": "Mozambique", + "Myanmar": "Myanmar", + "Namibia": "Namibia", + "Nauru": "Nauru", + "Nepal": "Nepal", + "Netherlands": "Netherlands", + "New Zealand": "New Zealand", + "Nicaragua": "Nicaragua", + "Niger": "Niger", + "Nigeria": "Nigeria", + "North Macedonia": "North Macedonia", + "Norway": "Norway", + "Oman": "Oman", + "Pakistan": "Pakistan", + "Palau": "Palau", + "Panama": "Panama", + "Papua New Guinea": "Papua New Guinea", + "Paraguay": "Paraguay", + "Peru": "Peru", + "Philippines": "Philippines", + "Poland": "Poland", + "Portugal": "Portugal", + "Puerto Rico": "Puerto Rico", + "Qatar": "Qatar", + "Republic of Congo": "Congo", + "Romania": "Romania", + "Russia": "Russia", + "Rwanda": "Rwanda", + "Samoa": "Samoa", + "San Marino": "San Marino", + "Saudi Arabia": "Saudi Arabia", + "Senegal": "Senegal", + "Serbia": "Serbia", + "Seychelles": "Seychelles", + "Sierra Leone": "Sierra Leone", + "Singapore": "Singapore", + "Slovak Republic": "Slovakia", + "Slovenia": "Slovenia", + "Solomon Islands": "Solomon Islands", + "Somalia": "Somalia", + "South Africa": "South Africa", + "South Sudan": "South Sudan", + "Spain": "Spain", + "Sri Lanka": "Sri Lanka", + "St. Kitts and Nevis": "Saint Kitts and Nevis", + "St. Lucia": "Saint Lucia", + "St. Vincent and the Grenadines": "Saint Vincent and the Grenadines", + "Sudan": "Sudan", + "Suriname": "Suriname", + "Sweden": "Sweden", + "Switzerland": "Switzerland", + "Syria": "Syria", + "Tajikistan": "Tajikistan", + "Tanzania": "Tanzania", + "Thailand": "Thailand", + "The Bahamas": "Bahamas", + "Timor-Leste": "East Timor", + "Togo": "Togo", + "Tonga": "Tonga", + "Trinidad and Tobago": "Trinidad and Tobago", + "Tunisia": "Tunisia", + "Turkmenistan": "Turkmenistan", + "Tuvalu": "Tuvalu", + "Uganda": "Uganda", + "Ukraine": "Ukraine", + "United Arab Emirates": "United Arab Emirates", + "United Kingdom": "United Kingdom", + "United States": "United States", + "Uruguay": "Uruguay", + "Uzbekistan": "Uzbekistan", + "Vanuatu": "Vanuatu", + "Venezuela": "Venezuela", + "Vietnam": "Vietnam", + "West Bank and Gaza": "Palestine", + "Yemen": "Yemen", + "Zambia": "Zambia", + "Zimbabwe": "Zimbabwe", + "Islamic Republic of Iran": "Iran", + "Korea": "South Korea", + "Lao P.D.R.": "Laos", + "Macao SAR": "Macao", + "S\u00e3o Tom\u00e9 and Pr\u00edncipe": "Sao Tome and Principe", + "Taiwan Province of China": "Taiwan", + "The Gambia": "Gambia", + "T\u00fcrkiye": "Turkey" +} \ No newline at end of file diff --git a/etl/steps/data/garden/imf/2024-11-25/world_economic_outlook.meta.yml b/etl/steps/data/garden/imf/2024-11-25/world_economic_outlook.meta.yml new file mode 100644 index 00000000000..a14cc9b7857 --- /dev/null +++ b/etl/steps/data/garden/imf/2024-11-25/world_economic_outlook.meta.yml @@ -0,0 +1,58 @@ +definitions: + common: + presentation: + topic_tags: + - Economic Growth + processing_level: minor + +dataset: + update_period_days: 183 + +tables: + world_economic_outlook: + variables: + # GDP growth + gross_domestic_product__constant_prices__percent_change_observation: + title: Gross domestic product, constant prices - Percent change - Observations + unit: "%" + short_unit: "%" + description_short: "Annual percent change in gross domestic product. This data is adjusted for inflation." + description_from_producer: "Gross domestic product, constant prices. Percent change. Annual percentages of constant price GDP are year-on-year changes; the base year is country-specific. Expenditure-based GDP is total final expenditures at purchasers' prices (including the f.o.b. value of exports of goods and services), less the f.o.b. value of imports of goods and services. [SNA 1993]" + display: + numDecimalPlaces: 1 + tableDisplay: + hideRelativeChange: True + + gross_domestic_product__constant_prices__percent_change_forecast: + title: Gross domestic product, constant prices - Percent change - Forecasts + unit: "%" + short_unit: "%" + description_short: "Near-term projections of the annual percent change in gross domestic product. This data is adjusted for inflation." + description_from_producer: "Gross domestic product, constant prices. Percent change. Annual percentages of constant price GDP are year-on-year changes; the base year is country-specific. Expenditure-based GDP is total final expenditures at purchasers' prices (including the f.o.b. value of exports of goods and services), less the f.o.b. value of imports of goods and services. [SNA 1993]" + display: + numDecimalPlaces: 1 + tableDisplay: + hideRelativeChange: True + + # Unemployment rate + unemployment_rate__percent_of_total_labor_force_observation: + title: Unemployment rate - Percent of total labor force - Observations + unit: "%" + short_unit: "%" + description_short: "Unemployment refers to the share of the labor force that is without work but available for and seeking employment." + description_from_producer: "Unemployment rate can be defined by either the national definition, the ILO harmonized definition, or the OECD harmonized definition. The OECD harmonized unemployment rate gives the number of unemployed persons as a percentage of the labor force (the total number of people employed plus unemployed). [OECD Main Economic Indicators, OECD, monthly] As defined by the International Labour Organization, unemployed workers are those who are currently not working but are willing and able to work for pay, currently available to work, and have actively searched for work. [ILO, http://www.ilo.org/public/english/bureau/stat/res/index.htm]" + display: + numDecimalPlaces: 1 + tableDisplay: + hideRelativeChange: True + + unemployment_rate__percent_of_total_labor_force_forecast: + title: Unemployment rate - Percent of total labor force - Forecasts + unit: "%" + short_unit: "%" + description_short: "Near-term projections. Unemployment refers to the share of the labor force that is without work but available for and seeking employment." + description_from_producer: "Unemployment rate can be defined by either the national definition, the ILO harmonized definition, or the OECD harmonized definition. The OECD harmonized unemployment rate gives the number of unemployed persons as a percentage of the labor force (the total number of people employed plus unemployed). [OECD Main Economic Indicators, OECD, monthly] As defined by the International Labour Organization, unemployed workers are those who are currently not working but are willing and able to work for pay, currently available to work, and have actively searched for work. [ILO, http://www.ilo.org/public/english/bureau/stat/res/index.htm]" + display: + numDecimalPlaces: 1 + tableDisplay: + hideRelativeChange: True \ No newline at end of file diff --git a/etl/steps/data/garden/imf/2024-11-25/world_economic_outlook.py b/etl/steps/data/garden/imf/2024-11-25/world_economic_outlook.py new file mode 100644 index 00000000000..a528b98f01c --- /dev/null +++ b/etl/steps/data/garden/imf/2024-11-25/world_economic_outlook.py @@ -0,0 +1,38 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("world_economic_outlook") + + # Read table from meadow dataset. + tb = ds_meadow.read("world_economic_outlook") + + # + # Process data. + # + tb = geo.harmonize_countries( + df=tb, + countries_file=paths.country_mapping_path, + ) + tb = tb.format(["country", "year"]) + + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() diff --git a/etl/steps/data/grapher/imf/2024-11-25/world_economic_outlook.py b/etl/steps/data/grapher/imf/2024-11-25/world_economic_outlook.py new file mode 100644 index 00000000000..766fc129725 --- /dev/null +++ b/etl/steps/data/grapher/imf/2024-11-25/world_economic_outlook.py @@ -0,0 +1,50 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("world_economic_outlook") + + # Read table from garden dataset. + tb = ds_garden.read("world_economic_outlook", reset_index=False) + + # For Grapher charts, we want the dashed projection line to start at the last observation so + # that the line looks continuous. For this, we take each variable's last observation per country + # and make it its first forecast as well. + indicators = tb.columns.str.replace("_observation|_forecast", "", regex=True).unique().tolist() + tb = tb.reset_index() + + for ind in indicators: + # Find the last observation year by country + last_obs = tb.loc[tb[f"{ind}_observation"].notnull()].groupby("country")["year"].max() + + # Assign that to last_obs column + tb["last_obs"] = tb["country"].map(last_obs) + + # Where the year is the last_obs year, assign the value of the last observation + tb.loc[tb["year"] == tb["last_obs"], f"{ind}_forecast"] = tb[f"{ind}_observation"] + + # Drop last_obs + tb = tb.drop(columns="last_obs") + + # Reinstate the index + tb = tb.format(["country", "year"]) + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/meadow/imf/2024-11-25/world_economic_outlook.py b/etl/steps/data/meadow/imf/2024-11-25/world_economic_outlook.py new file mode 100644 index 00000000000..b534462322e --- /dev/null +++ b/etl/steps/data/meadow/imf/2024-11-25/world_economic_outlook.py @@ -0,0 +1,124 @@ +"""Load a snapshot and create a meadow dataset.""" + +import pandas as pd +from owid.catalog import Table + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +VARIABLE_LIST = [ + "NGDP_RPCH", # Gross domestic product, constant prices / Percent change + "LUR", # Unemployment rate / Percent of total labor force +] + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("world_economic_outlook.xls") + + # Load data from snapshot. + tb = snap.read_csv(delimiter="\t", encoding="utf-16-le") + + print(tb) + + # + # Process data. + # + tb = select_data(tb) + tb = make_variable_names(tb) + tb = pick_variables(tb) + tb = reshape_and_clean(tb) + + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + tb = tb.format(["country", "year"]) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset(dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=snap.metadata) + + # Save changes in the new meadow dataset. + ds_meadow.save() + + +def select_data(tb: Table) -> Table: + """ + Selects the data we want to import from the raw table + """ + + tb = tb.drop( + columns=[ + "WEO Country Code", + "ISO", + "Country/Series-specific Notes", + "Subject Notes", + "Scale", + ] + ).dropna(subset=["Country"]) + + return tb + + +def make_variable_names(tb: Table) -> Table: + """ + Creates a variable name from the Subject Descriptor and Units columns. + """ + + tb["variable"] = tb["Subject Descriptor"] + " - " + tb["Units"] + tb = tb.drop(columns=["Subject Descriptor", "Units"]) + + return tb + + +def pick_variables(tb: Table) -> Table: + """ + Selects the variables we want to import from the raw table. + """ + + # Select only the variables we want to import. + tb = tb[tb["WEO Subject Code"].isin(VARIABLE_LIST)].reset_index(drop=True) + + # Drop WEO Subject Code + tb = tb.drop(columns="WEO Subject Code") + + return tb + + +def reshape_and_clean(tb: Table) -> Table: + """ + Reshapes the table from wide to long format and cleans the data. + """ + + # Drop any column with "Unnamed" in the name. + tb = tb.drop(columns=tb.columns[tb.columns.str.contains("Unnamed")]) + + tb = tb.melt(id_vars=["Country", "variable", "Estimates Start After"], var_name="year") + + # Coerce values to numeric. + tb["value"] = tb["value"].replace("--", pd.NA).astype("Float64") + tb["year"] = tb["year"].astype("Int64") + + # Split between observations and forecasts + tb.loc[tb.year > tb["Estimates Start After"], "variable"] += "_forecast" + tb.loc[tb.year <= tb["Estimates Start After"], "variable"] += "_observation" + + # Drop rows with missing values. + tb = tb.dropna(subset=["value"]) + + # Drop Estimates Start After + tb = tb.drop(columns="Estimates Start After") + + tb = tb.pivot( + index=["Country", "year"], + columns="variable", + values="value", + join_column_levels_with="_", + ) + + return tb diff --git a/snapshots/imf/2024-11-25/world_economic_outlook.py b/snapshots/imf/2024-11-25/world_economic_outlook.py new file mode 100644 index 00000000000..4160c43e799 --- /dev/null +++ b/snapshots/imf/2024-11-25/world_economic_outlook.py @@ -0,0 +1,37 @@ +""" +Script to create a snapshot of dataset. + +The IMF doesn't allow automatic download of the dataset, so we need to manually download the dataset from the IMF website. + 1. Visit https://www.imf.org/en/Publications/SPROLLS/world-economic-outlook-databases + 2. Select the latest version of the data. + 3. Select "Entire dataset" + 4. Select "By Countries" to download the file. + 5. Save the file to this folder. + 6. Run this command on the terminal: + python snapshots/imf/{version}/world_economic_outlook.py --path-to-file + 7. Delete the file from the folder. +""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +@click.option("--path-to-file", prompt=True, type=str, help="Path to local data file.") +def main(path_to_file: str, upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"imf/{SNAPSHOT_VERSION}/world_economic_outlook.xls") + + # Copy local data file to snapshots data folder, add file to DVC and upload to S3. + snap.create_snapshot(filename=path_to_file, upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/imf/2024-11-25/world_economic_outlook.xls.dvc b/snapshots/imf/2024-11-25/world_economic_outlook.xls.dvc new file mode 100644 index 00000000000..c8d796e1d8b --- /dev/null +++ b/snapshots/imf/2024-11-25/world_economic_outlook.xls.dvc @@ -0,0 +1,31 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: World Economic Outlook (WEO) + description: |- + The World Economic Outlook (WEO) database contains selected macroeconomic data series from the statistical appendix of the report of the same name, which presents the IMF staff's analysis and projections of economic developments at the global level, in major country groups, and many individual countries. + date_published: "2024-10-22" + version_producer: October 2024 + + # Citation + producer: International Monetary Fund + citation_full: |- + IMF. 2024. World Economic Outlook, October 2024. Washington, DC: International Monetary Fund. ©IMF. https://doi.org/10.5089/9798400281150.081 + attribution_short: IMF + + # Files + url_main: https://www.imf.org/en/Publications/WEO/weo-database/2024/October + url_download: https://www.imf.org/-/media/Files/Publications/WEO/WEO-Database/2024/October/WEOOct2024all.ashx + date_accessed: 2024-11-25 + + # License + license: + name: IMF Copyright and Usage + url: https://www.imf.org/en/About/copyright-and-terms + +outs: + - md5: 2b56b3c547bd689518188f64d739e8d5 + size: 20297276 + path: world_economic_outlook.xls