diff --git a/dag/archive/poverty_inequality.yml b/dag/archive/poverty_inequality.yml index d35c56ca854..e2229fd573f 100644 --- a/dag/archive/poverty_inequality.yml +++ b/dag/archive/poverty_inequality.yml @@ -35,3 +35,11 @@ steps: - data://garden/wb/2024-03-27/world_bank_pip data://grapher/wb/2024-03-27/world_bank_pip_2017ppp: - data://garden/wb/2024-03-27/world_bank_pip + + # Multidimensional Poverty Index + data://meadow/ophi/2023-07-05/multidimensional_poverty_index: + - snapshot://ophi/2023-07-05/multidimensional_poverty_index.csv + data://garden/ophi/2023-07-05/multidimensional_poverty_index: + - data://meadow/ophi/2023-07-05/multidimensional_poverty_index + data://grapher/ophi/2023-07-05/multidimensional_poverty_index: + - data://garden/ophi/2023-07-05/multidimensional_poverty_index diff --git a/dag/poverty_inequality.yml b/dag/poverty_inequality.yml index aa961d1ba89..0e8c94291c3 100644 --- a/dag/poverty_inequality.yml +++ b/dag/poverty_inequality.yml @@ -60,13 +60,14 @@ steps: data://explorers/lis/latest/luxembourg_income_study: - data://garden/lis/2024-06-13/luxembourg_income_study - # Multidimensional Poverty Index - data://meadow/ophi/2023-07-05/multidimensional_poverty_index: - - snapshot://ophi/2023-07-05/multidimensional_poverty_index.csv - data://garden/ophi/2023-07-05/multidimensional_poverty_index: - - data://meadow/ophi/2023-07-05/multidimensional_poverty_index - data://grapher/ophi/2023-07-05/multidimensional_poverty_index: - - data://garden/ophi/2023-07-05/multidimensional_poverty_index + # Global Multidimensional Poverty Index + data://meadow/ophi/2024-10-28/multidimensional_poverty_index: + - snapshot://ophi/2024-10-28/multidimensional_poverty_index_cme.csv + - snapshot://ophi/2024-10-28/multidimensional_poverty_index_hot.csv + data://garden/ophi/2024-10-28/multidimensional_poverty_index: + - data://meadow/ophi/2024-10-28/multidimensional_poverty_index + data://grapher/ophi/2024-10-28/multidimensional_poverty_index: + - data://garden/ophi/2024-10-28/multidimensional_poverty_index # # OECD Income Distribution Database data://meadow/oecd/2024-04-10/income_distribution_database: diff --git a/etl/grapher_io.py b/etl/grapher_io.py index 5f167805f7d..57db8d900a7 100644 --- a/etl/grapher_io.py +++ b/etl/grapher_io.py @@ -510,19 +510,31 @@ def variable_data_table_from_catalog( # {'name': 'gender', 'value': 'all'}, # {'name': 'age_group', 'value': '15-29'} # ] - filters = variables[0].dimensions["filters"] - dim_names = [f["name"] for f in filters] + # Collect unique dimension names across all variables' filters. + dim_names = [] + for variable in variables: + if "filters" in variable.dimensions: # type: ignore + for f in variable.dimensions["filters"]: # type: ignore + if f["name"] not in dim_names: + dim_names.append(f["name"]) + + # Pivot the table using the generalized dimension names. tb_pivoted = tb.pivot(index=["country", "year"], columns=dim_names) + # Generate labels with exact structure matching tb_pivoted.columns. labels = [] for variable in variables: assert variable.dimensions, f"Variable {variable.id} has no dimensions" - labels.append( - tuple( - [variable.dimensions["originalShortName"]] - + [f["value"] for f in variable.dimensions["filters"]] + label_tuple = [variable.dimensions["originalShortName"]] + + # Add filter values in the same order as dim_names. + for name in dim_names: + value = next( + (f["value"] for f in variable.dimensions.get("filters", []) if f["name"] == name), None ) - ) + label_tuple.append(value) # type: ignore + + labels.append(tuple(label_tuple)) tb = tb_pivoted.loc[:, labels] diff --git a/etl/steps/data/garden/ophi/2024-10-28/multidimensional_poverty_index.countries.json b/etl/steps/data/garden/ophi/2024-10-28/multidimensional_poverty_index.countries.json new file mode 100644 index 00000000000..602314eb086 --- /dev/null +++ b/etl/steps/data/garden/ophi/2024-10-28/multidimensional_poverty_index.countries.json @@ -0,0 +1,114 @@ +{ + "Afghanistan": "Afghanistan", + "Albania": "Albania", + "Algeria": "Algeria", + "Armenia": "Armenia", + "Bangladesh": "Bangladesh", + "Belize": "Belize", + "Benin": "Benin", + "Bolivia": "Bolivia", + "Bosnia and Herzegovina": "Bosnia and Herzegovina", + "Burkina Faso": "Burkina Faso", + "Burundi": "Burundi", + "Cambodia": "Cambodia", + "Cameroon": "Cameroon", + "Central African Republic": "Central African Republic", + "Chad": "Chad", + "China": "China", + "Colombia": "Colombia", + "Comoros": "Comoros", + "Congo": "Congo", + "Cote d'Ivoire": "Cote d'Ivoire", + "Dominican Republic": "Dominican Republic", + "Ecuador": "Ecuador", + "Egypt": "Egypt", + "Eswatini": "Eswatini", + "Ethiopia": "Ethiopia", + "Gabon": "Gabon", + "Gambia": "Gambia", + "Ghana": "Ghana", + "Guinea": "Guinea", + "Guinea-Bissau": "Guinea-Bissau", + "Guyana": "Guyana", + "Haiti": "Haiti", + "Honduras": "Honduras", + "India": "India", + "Indonesia": "Indonesia", + "Iraq": "Iraq", + "Jamaica": "Jamaica", + "Jordan": "Jordan", + "Kazakhstan": "Kazakhstan", + "Kenya": "Kenya", + "Kyrgyzstan": "Kyrgyzstan", + "Lao PDR": "Laos", + "Lesotho": "Lesotho", + "Liberia": "Liberia", + "Madagascar": "Madagascar", + "Malawi": "Malawi", + "Mali": "Mali", + "Mauritania": "Mauritania", + "Mexico": "Mexico", + "Moldova": "Moldova", + "Mongolia": "Mongolia", + "Montenegro": "Montenegro", + "Morocco": "Morocco", + "Mozambique": "Mozambique", + "Namibia": "Namibia", + "Nepal": "Nepal", + "Nicaragua": "Nicaragua", + "Niger": "Niger", + "Nigeria": "Nigeria", + "North Macedonia": "North Macedonia", + "Pakistan": "Pakistan", + "Palestine, State of": "Palestine", + "Peru": "Peru", + "Philippines": "Philippines", + "Rwanda": "Rwanda", + "Sao Tome and Principe": "Sao Tome and Principe", + "Senegal": "Senegal", + "Serbia": "Serbia", + "Sierra Leone": "Sierra Leone", + "Sudan": "Sudan", + "Suriname": "Suriname", + "Tajikistan": "Tajikistan", + "Tanzania": "Tanzania", + "Thailand": "Thailand", + "Timor-Leste": "East Timor", + "Togo": "Togo", + "Trinidad and Tobago": "Trinidad and Tobago", + "Tunisia": "Tunisia", + "Turkmenistan": "Turkmenistan", + "Uganda": "Uganda", + "Ukraine": "Ukraine", + "Viet Nam": "Vietnam", + "Yemen": "Yemen", + "Zambia": "Zambia", + "Zimbabwe": "Zimbabwe", + "Congo, Democratic Republic of the": "Democratic Republic of Congo", + "Angola": "Angola", + "Argentina": "Argentina", + "Barbados": "Barbados", + "Bhutan": "Bhutan", + "Botswana": "Botswana", + "Brazil": "Brazil", + "Costa Rica": "Costa Rica", + "Cuba": "Cuba", + "El Salvador": "El Salvador", + "Fiji": "Fiji", + "Georgia": "Georgia", + "Guatemala": "Guatemala", + "Kiribati": "Kiribati", + "Libya": "Libya", + "Maldives": "Maldives", + "Myanmar": "Myanmar", + "Papua New Guinea": "Papua New Guinea", + "Paraguay": "Paraguay", + "Saint Lucia": "Saint Lucia", + "Samoa": "Samoa", + "Seychelles": "Seychelles", + "South Africa": "South Africa", + "Sri Lanka": "Sri Lanka", + "Tonga": "Tonga", + "Tuvalu": "Tuvalu", + "Uzbekistan": "Uzbekistan" +} \ No newline at end of file diff --git a/etl/steps/data/garden/ophi/2024-10-28/multidimensional_poverty_index.meta.yml b/etl/steps/data/garden/ophi/2024-10-28/multidimensional_poverty_index.meta.yml new file mode 100644 index 00000000000..ea568b31d4b --- /dev/null +++ b/etl/steps/data/garden/ophi/2024-10-28/multidimensional_poverty_index.meta.yml @@ -0,0 +1,304 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + processing_level: minor + display: &common_display + tolerance: 12 + presentation: + topic_tags: + - Poverty + # Description from producer + description_from_producer: >- + The global MPI is a measure of acute poverty covering over 100 countries in the developing + regions of the world. This measure is based on the dual-cutoff counting approach to poverty + developed by Alkire and Foster (2011). The global MPI was developed in 2010 by Alkire and + Santos (2014, 2010) in collaboration with the UNDP’s Human Development Report Office + (HDRO). Since its inception, the global MPI has used information from 10 indicators, which are + grouped into three equally weighted dimensions: health, education, and living standards. + These dimensions are the same as those used in the UNDP’s Human Development Index. + + + In 2018, the first major revision of the global MPI was undertaken, considering improvements in + survey microdata and better align to the 2030 development agenda insofar as possible (Alkire and + Jahan, 2018; OPHI, 2018). The revision consisted of adjustments in the definition of five out of + the ten indicators, namely child mortality, nutrition, years of schooling, housing and assets. Alkire, + Kanagaratnam, Nogales and Suppa (2022) provide a comprehensive analysis of the consequences + of the 2018 revision. The normative and empirical decisions that underlie the revision of the global + MPI, and adjustments related to the child mortality, nutrition, years of schooling and housing + indicators are discussed in Alkire and Kanagaratnam (2021). The revision of assets indicator is + detailed in Vollmer and Alkire (2022). + + + The global MPI begins by establishing a deprivation profile for each person, showing which of the + 10 indicators they are deprived in. Each person is identified as deprived or non-deprived in each + indicator based on a deprivation cutoff. In the case of health and education, each + household member may be identified as deprived or not deprived according to available + information for other household members. For example, if any household member for whom data + exist is undernourished, each person in that household is considered deprived in nutrition. Taking + this approach – which was required by the data – does not reveal intrahousehold disparities, but + is intuitive and assumes shared positive (or negative) effects of achieving (or not achieving) certain + outcomes. Next, looking across indicators, each person’s deprivation score is constructed by + adding up the weights of the indicators in which they are deprived. The indicators use a nested + weight structure: equal weights across dimensions and an equal weight for each indicator within a + dimension. The normalised indicator weight structure of the global MPI means that the living + standard indicators receive lower weight than health and education related indicators because from + a policy perspective, each of the three dimensions is of roughly equal normative importance. + + + In the global MPI, a person is identified as multidimensionally poor or MPI poor if they are + deprived in at least one-third of the weighted MPI indicators. In other words, a person is MPI + poor if the person’s deprivation score is equal to or higher than the poverty cutoff of 33.33 percent. + After the poverty identification step, we aggregate across individuals to obtain the incidence of + poverty or headcount ratio (H) which represents the percentage of poor people in the population. + We then compute the intensity of poverty (A), representing the average percentage of weighted + deprivations experienced by the poor. We then compute the adjusted poverty headcount ratio (M0) + or MPI by combining H and A in a multiplicative form (MPI = H x A). + + + Both the incidence and the intensity of these deprivations are highly relevant pieces of information + for poverty measurement. The incidence of poverty is intuitive and understandable by anyone. + People always want to know how many poor people there are in a society as a proportion of the + whole population. Media tend to pick up on the incidence of poverty easily. Yet, the proportion + of poor people as the headline figure is not enough (Alkire, Oldiges and Kanagaratnam, 2021). + + + A headcount ratio is also estimated using two other poverty cutoffs. The global MPI identifies + individuals as vulnerable to poverty if they are close to the one-third threshold, that is, if they are + deprived in 20 to 33.32 percent of weighted indicators. The tables also apply a higher poverty + cutoff to identify those in severe poverty, meaning those deprived in 50 percent or more of the + dimensions. + + + The AF methodology has a property that makes the global MPI even more useful—dimensional + breakdown. This property makes it possible to consistently compute the percentage of the + population who are multidimensionally poor and simultaneously deprived in each indicator. This + is known as the censored headcount ratio of an indicator. The weighted sum of censored + headcount ratios of all MPI indicators is equal to the MPI value. + + + The censored headcount ratio shows the extent of deprivations among the poor but does not + reflect the weights or relative values of the indicators. Two indicators may have the same censored + headcount ratios but different contributions to overall poverty, because the contribution depends + both on the censored headcount ratio and on the weight assigned to each indicator. As such, a + complementary analysis to the censored headcount ratio is the percentage contribution of each + indicator to overall multidimensional poverty. + + # For description_short + description_short_multidimensional_poverty: Multidimensional poverty is defined as being deprived in a range of health, education and living standards indicators. + description_short_mpi: The MPI is a measure that reflects both the prevalence and the intensity of multidimensional poverty. + description_short_headcount_ratio: This is the share of the population that is multidimensionally poor. + description_short_intensity: The intensity is the share of indicators in which the multidimensionally poor are deprived on average. + description_short_vulnerable: This is the share of the population that is close to the multidimensional poverty threshold. + description_short_severe: This is the share of the population that is in severe multidimensional poverty. + description_short_censored_headcount_ratio: This is the share of the multidimensionally poor population deprived in the indicator <>. + description_short_uncensored_headcount_ratio: This is the share of the population deprived in the indicator <>. + description_short_area: |- + <% if area == "Urban" %> + This indicator is calculated for urban areas. + <% elif area == "Rural" %> + This indicator is calculated for rural areas. + <% elif area == "Camp" %> + This indicator is calculated for populations within refugee camps (available only for Palestine). + <%- endif -%> + + # For description_key + description_key_multidimensional_poverty_complement: |- + grouped into three dimensions: **health** (using two indicators: nutrition, child mortality), **education** (using two indicators: years of schooling, school attendance) and **living standards** (using five indicators: cooking fuel, sanitation, drinking water, electricity, housing, assets). + description_key_multidimensional_poverty: |- + Being _MPI poor_ means that a person is deprived in a third or more of ten indicators, {definitions.description_key_multidimensional_poverty_complement} + description_key_vulnerable: |- + Being _vulnerable_ to multidimensional poverty means that a person is deprived in 20-33.33% of ten indicators, {definitions.description_key_multidimensional_poverty_complement} + description_key_severe: |- + Being _severely_ multidimensionally poor means that a person is deprived in 50% or more of ten indicators, {definitions.description_key_multidimensional_poverty_complement} + description_key_assessments: |- + Households are assessed as being deprived in a given indicator if they do not meet a certain threshold for that indicator. For instance, a household is deprived in the _Years of schooling_ indicator if no household member has completed six years of schooling. A person is considered deprived in the _Cooking fuel_ indicator if they cook using solid fuel, such as dung, agricultural crops, wood, charcoal, or coal. The thresholds for each indicator are published by OPHI in their [methodological notes](https://ophi.org.uk/sites/default/files/2024-10/OPHI%20MPI%20Methodological%20Note%2058_2.pdf). + description_key_weights: |- + The individual indicators are not _weighted_ equally: When adding up the number of indicators in which a person is deprived, some count for more than others. Health and education indicators are given a weight of 1/6, while the indicators within the living standards dimension are given a weight of 1/18. This means that the three dimensions – health, education and living standards – have an equal weight in the total of one-third each. + description_key_missing_data: |- + If the household survey data being used is missing any of the 10 indicators, that indicator is dropped from the calculation. The weights are then adjusted so that each dimension continues to be given a weight of one-third. MPI poverty estimates are only calculated if at least one indicator in health and education dimensions is available, and if up to four indicators in the living standards dimension are available. + description_key_mpi_first: |- + The Multidimensional Poverty Index is obtained by multiplying two values: the share of people who are multidimensionally (MPI) poor and the intensity of multidimensional poverty among the MPI poor. A larger figure represents a higher level of poverty. + description_key_mpi_last: |- + The Multidimensional Poverty Index, being the product of the incidence and intensity of multidimensional poverty, reflects both. + description_key_intensity: |- + The intensity of multidimensional poverty is calculated as the average share of indicators in which those counted as MPI poor are deprived (using the same weights to calculate the average). This is an important complementary measure to the share of the population who are MPI poor (the _incidence_ of MPI poverty). + description_key_intensity_example: |- + An example given by the researchers who calculate the MPI data serves to illustrate this well: "Imagine two countries: in both, 30% of people are poor (incidence). Judged by this piece of information, these two countries are equally poor. However, imagine that in one of the two countries poor people are deprived —on average— in one-third of the dimensions, whereas in the other country, the poor are deprived —on average— in two-thirds. By combining the two pieces of information -the intensity of deprivations and the proportion of poor people- we know that these two countries are not equally poor, but rather that the second is poorer than the first because the intensity of poverty is higher." + description_key_flavor: |- + <% if flavor == "Current margin estimate" %> + This variable is a current margin estimate (CME), based on the most recent survey data. Look for the harmonized over time (HOT) estimate to see trends over time. + <% elif flavor == "Harmonized over time" %> + This variable is a harmonized over time (HOT) estimate. This harmonization seeks to make two or more MPI estimations comparable by aligning the indicator definitions in each survey. Look for the current margin estimate (CME) to see the most recent survey data. + <%- endif -%> + description_key_indicator_start: |- + A person in a household is deprived in the indicator _<>_ if + description_key_indicator_end: |- + <% if indicator == "Nutrition" %> + any person under 70 years of age for whom there is nutritional information is undernourished. This indicator is part of the _health_ dimension. + <% elif indicator == "Child mortality" %> + a child under 18 has died in the household in the five-year period preceding the survey. This indicator is part of the _health_ dimension. + <% elif indicator == "Years of schooling" %> + no eligible household member has completed six years of schooling. This indicator is part of the _education_ dimension. + <% elif indicator == "School attendance" %> + any school-aged child is not attending school up to the age at which he/she would complete class 8. This indicator is part of the _education_ dimension. + <% elif indicator == "Cooking fuel" %> + a household cooks using solid fuel, such as dung, agricultural crop, shrubs, wood, charcoal, or coal. This indicator is part of the _living standards_ dimension. + <% elif indicator == "Sanitation" %> + the household has unimproved or no sanitation facility or it is improved but shared with other households. This indicator is part of the _living standards_ dimension. + <% elif indicator == "Drinking water" %> + the household’s source of drinking water is not safe or safe drinking water is a 30-minute or longer walk from home, roundtrip. This indicator is part of the _living standards_ dimension. + <% elif indicator == "Electricity" %> + the household has no electricity. This indicator is part of the _living standards_ dimension. + <% elif indicator == "Housing" %> + the household has inadequate housing materials in any of the three components: floor, roof, or walls. This indicator is part of the _living standards_ dimension. + <% elif indicator == "Assets" %> + the household does not own more than one of these assets: radio, TV, telephone, computer, animal cart, bicycle, motorbike, or refrigerator, and does not own a car or truck. This indicator is part of the _living standards_ dimension. + <%- endif -%> + description_key_indicator: |- + {definitions.description_key_indicator_start} {definitions.description_key_indicator_end} + + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + + +tables: + multidimensional_poverty_index: + variables: + mpi: + title: Multidimensional Poverty Index (MPI) (<>) - <> + unit: "" + short_unit: "" + description_short: "{definitions.description_short_multidimensional_poverty} {definitions.description_short_mpi}{definitions.description_short_area}" + description_key: + - "{definitions.description_key_mpi_first}" + - "{definitions.description_key_multidimensional_poverty}" + - "{definitions.description_key_assessments}" + - "{definitions.description_key_weights}" + - "{definitions.description_key_missing_data}" + - "{definitions.description_key_intensity}" + - "{definitions.description_key_intensity_example}" + - "{definitions.description_key_mpi_last}" + - "{definitions.description_key_flavor}" + presentation: + title_public: Multidimensional Poverty Index (MPI) + title_variant: <>, <> + display: + name: Multidimensional Poverty Index (MPI) + numDecimalPlaces: 3 + <<: *common_display + + headcount_ratio: + title: Share of the population in multidimensional poverty (<>) - <> + unit: "%" + short_unit: "%" + description_short: "{definitions.description_short_multidimensional_poverty} {definitions.description_short_headcount_ratio}{definitions.description_short_area}" + description_key: + - "{definitions.description_key_multidimensional_poverty}" + - "{definitions.description_key_assessments}" + - "{definitions.description_key_weights}" + - "{definitions.description_key_missing_data}" + - "{definitions.description_key_flavor}" + presentation: + title_public: Share of the population in multidimensional poverty + title_variant: <>, <> + display: + name: Share of the population in multidimensional poverty + numDecimalPlaces: 1 + <<: *common_display + + intensity: + title: Intensity of multidimensional poverty (<>) - <> + unit: "%" + short_unit: "%" + description_short: "{definitions.description_short_multidimensional_poverty} {definitions.description_short_intensity}{definitions.description_short_area}" + description_key: + - "{definitions.description_key_multidimensional_poverty}" + - "{definitions.description_key_assessments}" + - "{definitions.description_key_weights}" + - "{definitions.description_key_missing_data}" + - "{definitions.description_key_intensity}" + - "{definitions.description_key_intensity_example}" + - "{definitions.description_key_flavor}" + presentation: + title_public: Intensity of multidimensional poverty + title_variant: <>, <> + display: + name: Intensity of multidimensional poverty + numDecimalPlaces: 1 + <<: *common_display + + vulnerable: + title: Share of the population vulnerable to multidimensional poverty (<>) - <> + unit: "%" + short_unit: "%" + description_short: "{definitions.description_short_multidimensional_poverty} {definitions.description_short_vulnerable}{definitions.description_short_area}" + description_key: + - "{definitions.description_key_vulnerable}" + - "{definitions.description_key_assessments}" + - "{definitions.description_key_weights}" + - "{definitions.description_key_missing_data}" + - "{definitions.description_key_flavor}" + presentation: + title_public: Share of the population vulnerable to multidimensional poverty + title_variant: <>, <> + display: + name: Share of the population vulnerable to multidimensional poverty + numDecimalPlaces: 1 + <<: *common_display + + severe: + title: Share of the population in severe multidimensional poverty (<>) - <> + unit: "%" + short_unit: "%" + description_short: "{definitions.description_short_multidimensional_poverty} {definitions.description_short_severe}{definitions.description_short_area}" + description_key: + - "{definitions.description_key_severe}" + - "{definitions.description_key_assessments}" + - "{definitions.description_key_weights}" + - "{definitions.description_key_missing_data}" + - "{definitions.description_key_flavor}" + presentation: + title_public: Share of the population in severe multidimensional poverty + title_variant: <>, <> + display: + name: Share of the population in severe multidimensional poverty + numDecimalPlaces: 1 + <<: *common_display + + censored_headcount_ratio: + title: Share of the population in multidimensional poverty deprived in the indicator <> (<>) - <> + unit: "%" + short_unit: "%" + description_short: "{definitions.description_short_multidimensional_poverty} {definitions.description_short_censored_headcount_ratio}{definitions.description_short_area}" + description_key: + - "{definitions.description_key_multidimensional_poverty}" + - "{definitions.description_key_indicator}" + - "{definitions.description_key_flavor}" + presentation: + title_public: Share of the population in multidimensional poverty deprived in the indicator <> + title_variant: <>, <> + display: + name: Share of the population in multidimensional poverty deprived in the indicator <> + numDecimalPlaces: 1 + <<: *common_display + + uncensored_headcount_ratio: + title: Share of the population deprived in the indicator <> (<>) - <> + unit: "%" + short_unit: "%" + description_short: "{definitions.description_short_multidimensional_poverty} {definitions.description_short_uncensored_headcount_ratio}{definitions.description_short_area}" + description_key: + - "{definitions.description_key_multidimensional_poverty}" + - "{definitions.description_key_indicator}" + - "{definitions.description_key_flavor}" + presentation: + title_public: Share of the population deprived in the indicator <> + title_variant: <>, <> + display: + name: Share of the population deprived in the indicator <> + numDecimalPlaces: 1 + <<: *common_display + diff --git a/etl/steps/data/garden/ophi/2024-10-28/multidimensional_poverty_index.py b/etl/steps/data/garden/ophi/2024-10-28/multidimensional_poverty_index.py new file mode 100644 index 00000000000..10016506bed --- /dev/null +++ b/etl/steps/data/garden/ophi/2024-10-28/multidimensional_poverty_index.py @@ -0,0 +1,149 @@ +"""Load a meadow dataset and create a garden dataset.""" + +import owid.catalog.processing as pr +from owid.catalog import Table + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +MEASURE_NAMES = { + "A": "intensity", + "H": "headcount_ratio", + "M0": "mpi", + "hd": "uncensored_headcount_ratio", + "hdk": "censored_headcount_ratio", + "sev": "severe", + "vuln": "vulnerable", +} + +# Define categories to keep in each column +CATEGORIES_TO_KEEP = { + "loa": ["area", "nat"], + "measure": list(MEASURE_NAMES.keys()), +} + +# Define indicator categories +INDICATOR_NAMES = [ + "Assets", + "Child mortality", + "Cooking fuel", + "Drinking water", + "Electricity", + "Housing", + "Nutrition", + "Sanitation", + "School attendance", + "Years of schooling", +] + +# Define index column for the final table +INDEX_COLS = ["country", "year", "indicator", "area", "flavor"] + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("multidimensional_poverty_index") + + # Read table from meadow dataset. + tb_hot = ds_meadow["hot"].reset_index() + tb_cme = ds_meadow["cme"].reset_index() + + # + # Process data. + # + tb_hot = geo.harmonize_countries( + df=tb_hot, + countries_file=paths.country_mapping_path, + warn_on_unused_countries=False, + ) + tb_cme = geo.harmonize_countries( + df=tb_cme, + countries_file=paths.country_mapping_path, + warn_on_unused_countries=False, + ) + + tb = make_tables_wide_and_merge(tb_cme=tb_cme, tb_hot=tb_hot) + + tb = tb.format(keys=INDEX_COLS, short_name="multidimensional_poverty_index") + + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() + + +def make_tables_wide_and_merge(tb_cme: Table, tb_hot: Table) -> Table: + """ + Make tables wide to separate indicators, rename categories and merge hot and cme tables + """ + + tb_cme = select_categories_and_rename(tb_cme) + tb_hot = select_categories_and_rename(tb_hot) + + # Make tables wide + tb_hot = tb_hot.pivot( + index=["country", "year", "indicator", "area"], + columns=["measure"], + values="b", + join_column_levels_with="_", + ).reset_index(drop=True) + + tb_cme = tb_cme.pivot( + index=["country", "year", "indicator", "area"], + columns=["measure"], + values="b", + join_column_levels_with="_", + ).reset_index(drop=True) + + # Add a flavor column to each table + tb_cme["flavor"] = "Current margin estimate" + tb_hot["flavor"] = "Harmonized over time" + + # Concatenate the two tables + tb = pr.concat([tb_cme, tb_hot], ignore_index=True) + + return tb + + +def select_categories_and_rename(tb: Table) -> Table: + """ + Select categories to keep and rename them + """ + + for col, categories in CATEGORIES_TO_KEEP.items(): + # Assert that all categories are in the column + assert set(categories).issubset( + set(tb[col].unique()) + ), f"Categories {set(categories) - set(tb[col].unique())} not in column {col}" + + # Filter categories + tb = tb[tb[col].isin(categories)].reset_index(drop=True) + + # Rename measure categories + tb["measure"] = tb["measure"].cat.rename_categories(MEASURE_NAMES) + + # Check that the column ind_lab contains all INDICATOR_NAMES + indicators_excluding_nan = tb[tb["ind_lab"].notna()]["ind_lab"].unique() + assert ( + set(indicators_excluding_nan) == set(INDICATOR_NAMES) + ), f"Column ind_lab is not identical to the expected list. These are the differences: {set(INDICATOR_NAMES) - set(indicators_excluding_nan)}" + + # Remove indicator and area columns + tb = tb.drop(columns=["indicator"]) + + # Rename ind_lab as indicator and area_lab as area + tb = tb.rename(columns={"ind_lab": "indicator", "area_lab": "area"}) + + return tb diff --git a/etl/steps/data/grapher/ophi/2024-10-28/multidimensional_poverty_index.py b/etl/steps/data/grapher/ophi/2024-10-28/multidimensional_poverty_index.py new file mode 100644 index 00000000000..cdcd55d33ba --- /dev/null +++ b/etl/steps/data/grapher/ophi/2024-10-28/multidimensional_poverty_index.py @@ -0,0 +1,28 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("multidimensional_poverty_index") + + # Read table from garden dataset. + tb = ds_garden["multidimensional_poverty_index"] + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/meadow/ophi/2024-10-28/multidimensional_poverty_index.py b/etl/steps/data/meadow/ophi/2024-10-28/multidimensional_poverty_index.py new file mode 100644 index 00000000000..67f94d5db3f --- /dev/null +++ b/etl/steps/data/meadow/ophi/2024-10-28/multidimensional_poverty_index.py @@ -0,0 +1,84 @@ +"""Load a snapshot and create a meadow dataset.""" + +from typing import List + +from owid.catalog import Table + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +# Define index columns for hot and cot tables. +INDEX_COLS = [ + "country", + "year", + "loa", + "measure", + "indicator", + "region_lab", + "area_lab", + "agec2_lab", + "agec4_lab", +] + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap_cme = paths.load_snapshot("multidimensional_poverty_index_cme.csv") + snap_hot = paths.load_snapshot("multidimensional_poverty_index_hot.csv") + + # Load data from snapshot. + tb_cme = snap_cme.read() + tb_hot = snap_hot.read() + + # + # Process data. + # + # Format columns and index. + tb_cme = format_columns_and_index(tb=tb_cme, short_name="cme", index_columns=INDEX_COLS) + tb_hot = format_columns_and_index(tb=tb_hot, short_name="hot", index_columns=INDEX_COLS) + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset( + dest_dir, tables=[tb_cme, tb_hot], check_variables_metadata=True, default_metadata=snap_cme.metadata + ) + + # Save changes in the new meadow dataset. + ds_meadow.save() + + +def format_columns_and_index(tb: Table, short_name: str, index_columns: List[str]) -> Table: + """ + Rename columns, format year and select the categories I need. + """ + # Rename columns. + tb = tb.rename(columns={"cty_lab": "country"}) + + # Make year string + tb["year"] = tb["year"].astype("string") + + # In the measure column, select all the categories, except for pctb + tb = tb[tb["measure"] != "pctb"].reset_index(drop=True) + + tb = tb[~tb["loa"].isin(["hship", "agec2", "agec4", "region"])].reset_index(drop=True) + + # NOTE: On years + # As the year data is encoded in a string variable between two years, we need to map the data to a single (integer) year. + # For now, arbitrarily, I take the first year in these cases and convert to integer. + + tb["year"] = tb["year"].str[:4].astype(int) + + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + tb = tb.format( + index_columns, + short_name=short_name, + ) + + return tb diff --git a/snapshots/ophi/2024-10-28/multidimensional_poverty_index.py b/snapshots/ophi/2024-10-28/multidimensional_poverty_index.py new file mode 100644 index 00000000000..fb8dc8b1ec2 --- /dev/null +++ b/snapshots/ophi/2024-10-28/multidimensional_poverty_index.py @@ -0,0 +1,28 @@ +"""Script to create a snapshot of dataset.""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + +# Define snapshot variants +SNAPSHOT_VARIANTS = ["cme", "hot"] + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +def main(upload: bool) -> None: + for variant in SNAPSHOT_VARIANTS: + # Create a new snapshot. + snap = Snapshot(f"ophi/{SNAPSHOT_VERSION}/multidimensional_poverty_index_{variant}.csv") + + # Download data from source, add file to DVC and upload to S3. + snap.create_snapshot(upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/ophi/2024-10-28/multidimensional_poverty_index_cme.csv.dvc b/snapshots/ophi/2024-10-28/multidimensional_poverty_index_cme.csv.dvc new file mode 100644 index 00000000000..6d7635f29bd --- /dev/null +++ b/snapshots/ophi/2024-10-28/multidimensional_poverty_index_cme.csv.dvc @@ -0,0 +1,43 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: Global Multidimensional Poverty Index (MPI) + description: |- + The global Multidimensional Poverty Index (MPI) is an international measure of acute multidimensional poverty covering over 100 developing countries. It complements traditional monetary poverty measures by capturing the acute deprivations in health, education, and living standards that a person faces simultaneously. + + The MPI assesses poverty at the individual level. If a person is deprived in a third or more of ten (weighted) indicators, the global MPI identifies them as ‘MPI poor’. The extent – or intensity – of their poverty is also measured through the percentage of deprivations they are experiencing. + + The global MPI shows who is poor and how they are poor and can be used to create a comprehensive picture of people living in poverty. It permits comparisons both across countries and world regions, and within countries by ethnic group, urban/rural area, subnational region, and age group, as well as other key household and community characteristics. For each group and for countries as a whole, the composition of MPI by each of the ten indicators shows how people are poor. + + This makes the MPI and its linked information platform invaluable as an analytical tool to identify the most vulnerable people – the poorest among the poor, revealing poverty patterns within countries and over time, enabling policy makers to target resources and design policies more effectively. + + The global MPI was developed by OPHI with the UN Development Programme (UNDP) for inclusion in UNDP’s flagship Human Development Report in 2010. It has been published annually by OPHI and in the HDRs ever since. + date_published: "2024-10-17" + version_producer: 2024 + title_snapshot: Global Multidimensional Poverty Index (MPI) - Current margin estimates (CME) + description_snapshot: |- + This dataset contains current margin estimates (CME), based on the most recent survey data. + + # Citation + producer: Alkire, Kanagaratnam and Suppa + citation_full: |- + - Alkire, S., Kanagaratnam, U., and Suppa, N. (2024). The Global Multidimensional Poverty Index (MPI) 2024. Country Results and Methodological Note. OPHI MPI Methodological Note 58, Oxford Poverty and Human Development Initiative, University of Oxford. + - Alkire, S., Kanagaratnam, U., and Suppa, N. (2024). The Global Multidimensional Poverty Index (MPI) 2024. Disaggregation Results and Methodological Note. OPHI MPI Methodological Note 59, Oxford Poverty and Human Development Initiative, University of Oxford. + attribution: Alkire, Kanagaratnam and Suppa (2024) - The Global Multidimensional Poverty Index (MPI) 2024 + + # Files + url_main: https://ophi.org.uk/global-mpi + url_download: https://cloud-ophi.qeh.ox.ac.uk/index.php/s/eRLL5jGKPLTygYT/download?path=%2F&files=GMPI2024_puf.csv + date_accessed: 2024-10-28 + + # License + license: + name: CC BY 4.0 + url: https://ophi.org.uk/global-mpi-frequently-asked-questions + +outs: + - md5: b1513539648aa4f1a89b9a98cc3f4d91 + size: 19293090 + path: multidimensional_poverty_index_cme.csv diff --git a/snapshots/ophi/2024-10-28/multidimensional_poverty_index_hot.csv.dvc b/snapshots/ophi/2024-10-28/multidimensional_poverty_index_hot.csv.dvc new file mode 100644 index 00000000000..649c37f3bde --- /dev/null +++ b/snapshots/ophi/2024-10-28/multidimensional_poverty_index_hot.csv.dvc @@ -0,0 +1,43 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: Global Multidimensional Poverty Index (MPI) + description: |- + The global Multidimensional Poverty Index (MPI) is an international measure of acute multidimensional poverty covering over 100 developing countries. It complements traditional monetary poverty measures by capturing the acute deprivations in health, education, and living standards that a person faces simultaneously. + + The MPI assesses poverty at the individual level. If a person is deprived in a third or more of ten (weighted) indicators, the global MPI identifies them as ‘MPI poor’. The extent – or intensity – of their poverty is also measured through the percentage of deprivations they are experiencing. + + The global MPI shows who is poor and how they are poor and can be used to create a comprehensive picture of people living in poverty. It permits comparisons both across countries and world regions, and within countries by ethnic group, urban/rural area, subnational region, and age group, as well as other key household and community characteristics. For each group and for countries as a whole, the composition of MPI by each of the ten indicators shows how people are poor. + + This makes the MPI and its linked information platform invaluable as an analytical tool to identify the most vulnerable people – the poorest among the poor, revealing poverty patterns within countries and over time, enabling policy makers to target resources and design policies more effectively. + + The global MPI was developed by OPHI with the UN Development Programme (UNDP) for inclusion in UNDP’s flagship Human Development Report in 2010. It has been published annually by OPHI and in the HDRs ever since. + date_published: "2024-10-17" + version_producer: 2024 + title_snapshot: Global Multidimensional Poverty Index (MPI) - Harmonized over time (HOT) + description_snapshot: |- + This dataset contains harmonized over time (HOT) estimates. This harmonization seeks to make two or more MPI estimations comparable by aligning the indicator definitions in each survey. + + # Citation + producer: Alkire, Kanagaratnam and Suppa + citation_full: |- + - Alkire, S., Kanagaratnam, U., and Suppa, N. (2024). A methodological note on the global Multidimensional Poverty Index (MPI) 2024 changes over time results for 86 countries. OPHI MPI Methodological Note 60, Oxford Poverty and Human Development Initiative, University of Oxford. + - Alkire, S., Kanagaratnam, U., and Suppa, N. (2024). The Global Multidimensional Poverty Index (MPI) 2024. Disaggregation Results and Methodological Note. OPHI MPI Methodological Note 59, Oxford Poverty and Human Development Initiative, University of Oxford. + attribution: Alkire, Kanagaratnam and Suppa (2024) - The Global Multidimensional Poverty Index (MPI) 2024 + + # Files + url_main: https://ophi.org.uk/global-mpi + url_download: https://cloud-ophi.qeh.ox.ac.uk/index.php/s/eRLL5jGKPLTygYT/download?path=%2F&files=GMPI_HOT_2024_puf.csv + date_accessed: 2024-10-28 + + # License + license: + name: CC BY 4.0 + url: https://ophi.org.uk/global-mpi-frequently-asked-questions + +outs: + - md5: 7de477b10168ce0dfea0c48046c1b905 + size: 31115054 + path: multidimensional_poverty_index_hot.csv