diff --git a/etl/steps/data/garden/animal_welfare/2024-12-17/fur_laws.py b/etl/steps/data/garden/animal_welfare/2024-12-17/fur_laws.py index 696076f1d20..149537707db 100644 --- a/etl/steps/data/garden/animal_welfare/2024-12-17/fur_laws.py +++ b/etl/steps/data/garden/animal_welfare/2024-12-17/fur_laws.py @@ -27,6 +27,10 @@ # NOTE: This is a label used by us (which currently coincides with the one used in the original data). NO_DATA_LABEL = "NO DATA" +# Define label for having no active farms reported. +# This will be used in countries with no active farms, where there is either no ban, or no information about a ban. +NO_ACTIVE_FARMS_REPORTED_LABEL = "No active farms reported" + # Define status for having no bans, but having phased out fur farming due to stricter regulations. PHASE_OUT_DUE_TO_STRICTER_REGULATIONS = "Phased out due to stricter regulations" # Rename fur farming status: @@ -39,7 +43,9 @@ PHASE_OUT_DUE_TO_STRICTER_REGULATIONS: PHASE_OUT_DUE_TO_STRICTER_REGULATIONS, # As requested by the Fur-Free Alliance, we will replace "NO DATA" by "No active farms reported", since, in this case, there is no sign of fur farming for those countries. # NOTE: The same does not apply to fur trading. - NO_DATA_LABEL: "No active farms reported", + NO_ACTIVE_FARMS_REPORTED_LABEL: NO_ACTIVE_FARMS_REPORTED_LABEL, + # NOTE: Some countries (for now Argentina and Brazil) have no ban, but it's unclear if they have active farms, so they will be labeled as "NO DATA". + NO_DATA_LABEL: NO_DATA_LABEL, } # Define label for fur farming status that are not yet effective. BANNED_NOT_EFFECTIVE = "Banned (not yet effective)" @@ -82,13 +88,6 @@ def run(dest_dir: str) -> None: # Remove empty rows. tb = tb.dropna(how="all").reset_index(drop=True) - # The first row is expected to say "All other countries are expected to be fur-free". - # Therefore, add all other countries and assume they do not have any active farms. - # NOTE: We confirmed this assumption with Fur Free Alliance. - error = "Data has changed. Manually check this part of the code." - assert tb.loc[0, "country"] == "All other countries do not have active fur farms", error - tb = tb.drop(0).reset_index(drop=True) - # Harmonize country names. tb = geo.harmonize_countries(tb, countries_file=paths.country_mapping_path) @@ -99,7 +98,13 @@ def run(dest_dir: str) -> None: & (tb_regions["region_type"] == "country") & (tb_regions["defined_by"] == "owid") ][["name"]] - .assign(**{"fur_farms_active": "NO", "fur_farming_status": NO_DATA_LABEL, "fur_trading_status": NO_DATA_LABEL}) + .assign( + **{ + "fur_farms_active": "NO", + "fur_farming_status": NO_ACTIVE_FARMS_REPORTED_LABEL, + "fur_trading_status": NO_DATA_LABEL, + } + ) .rename(columns={"name": "country"}, errors="raise") ) tb = pr.concat([tb, tb_added], ignore_index=True) @@ -182,14 +187,24 @@ def prepare_fur_farming_ban_status(tb: Table) -> Table: # Drop unnecessary column. tb = tb.drop(columns=["phase_out_due_to_stricter_regulations"], errors="raise") + # There are countries (for now, Argentina and Brazil) that have no ban, but it's unclear if they have active farms (the column for operating fur farms says "NO DATA"). + # Ensure they appear as "NO DATA". + select_unclear = tb["fur_farms_active"] == NO_DATA_LABEL + tb.loc[select_unclear, "fur_farming_status"] = NO_DATA_LABEL + # Map all fur farming statuses. - # NOTE: The data is ambiguous. There is "NO", "YES", missing data, and missing country. - # For now, assume that missing data means "NO", and missing country means "NO DATA". tb["fur_farming_status"] = map_series( tb["fur_farming_status"], mapping=FUR_FARMING_BAN_STATUS, warn_on_missing_mappings=True, warn_on_unused_mappings=True, + show_full_warning=True, + ) + + # Some countries (for now, only Cyprus) has no ban, but no active farms. Also Israel has no data on a ban, but no active farms. + # In both cases, it should appears as no active farms reported. + tb.loc[(tb["fur_farming_status"] == "Not banned") & (tb["fur_farms_active"] == "NO"), "fur_farming_status"] = ( + NO_ACTIVE_FARMS_REPORTED_LABEL ) # For those years years that are in the future, change the status. diff --git a/etl/steps/data/meadow/animal_welfare/2024-12-17/fur_laws.py b/etl/steps/data/meadow/animal_welfare/2024-12-17/fur_laws.py index 4dfa99c210e..f67e3fc6982 100644 --- a/etl/steps/data/meadow/animal_welfare/2024-12-17/fur_laws.py +++ b/etl/steps/data/meadow/animal_welfare/2024-12-17/fur_laws.py @@ -16,6 +16,13 @@ def run(dest_dir: str) -> None: # # Process data. # + # Remove last row, which simply gives general additional information. + # It says that any country that is not informed in the spreadsheet can be assumed to have no active fur farms. + # We confirmed this assumption with Fur Free Alliance. + error = "Spreadsheet has changed. Manually check this part of the code." + assert tb.iloc[-1]["COUNTRY"] == "No active fur farms have been reported in all other countries", error + tb = tb[:-1] + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. # NOTE: Do not verify integrity, since there are duplicated countries (to be fixed in the garden step). tb = tb.format(keys=["country"], verify_integrity=False)