Skip to content

Commit

Permalink
Fix issues with Argentina, Brazil and Cyprus
Browse files Browse the repository at this point in the history
  • Loading branch information
pabloarosado committed Dec 22, 2024
1 parent af2c865 commit 0363c1f
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 11 deletions.
37 changes: 26 additions & 11 deletions etl/steps/data/garden/animal_welfare/2024-12-17/fur_laws.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
# NOTE: This is a label used by us (which currently coincides with the one used in the original data).
NO_DATA_LABEL = "NO DATA"

# Define label for having no active farms reported.
# This will be used in countries with no active farms, where there is either no ban, or no information about a ban.
NO_ACTIVE_FARMS_REPORTED_LABEL = "No active farms reported"

# Define status for having no bans, but having phased out fur farming due to stricter regulations.
PHASE_OUT_DUE_TO_STRICTER_REGULATIONS = "Phased out due to stricter regulations"
# Rename fur farming status:
Expand All @@ -39,7 +43,9 @@
PHASE_OUT_DUE_TO_STRICTER_REGULATIONS: PHASE_OUT_DUE_TO_STRICTER_REGULATIONS,
# As requested by the Fur-Free Alliance, we will replace "NO DATA" by "No active farms reported", since, in this case, there is no sign of fur farming for those countries.
# NOTE: The same does not apply to fur trading.
NO_DATA_LABEL: "No active farms reported",
NO_ACTIVE_FARMS_REPORTED_LABEL: NO_ACTIVE_FARMS_REPORTED_LABEL,
# NOTE: Some countries (for now Argentina and Brazil) have no ban, but it's unclear if they have active farms, so they will be labeled as "NO DATA".
NO_DATA_LABEL: NO_DATA_LABEL,
}
# Define label for fur farming status that are not yet effective.
BANNED_NOT_EFFECTIVE = "Banned (not yet effective)"
Expand Down Expand Up @@ -82,13 +88,6 @@ def run(dest_dir: str) -> None:
# Remove empty rows.
tb = tb.dropna(how="all").reset_index(drop=True)

# The first row is expected to say "All other countries are expected to be fur-free".
# Therefore, add all other countries and assume they do not have any active farms.
# NOTE: We confirmed this assumption with Fur Free Alliance.
error = "Data has changed. Manually check this part of the code."
assert tb.loc[0, "country"] == "All other countries do not have active fur farms", error
tb = tb.drop(0).reset_index(drop=True)

# Harmonize country names.
tb = geo.harmonize_countries(tb, countries_file=paths.country_mapping_path)

Expand All @@ -99,7 +98,13 @@ def run(dest_dir: str) -> None:
& (tb_regions["region_type"] == "country")
& (tb_regions["defined_by"] == "owid")
][["name"]]
.assign(**{"fur_farms_active": "NO", "fur_farming_status": NO_DATA_LABEL, "fur_trading_status": NO_DATA_LABEL})
.assign(
**{
"fur_farms_active": "NO",
"fur_farming_status": NO_ACTIVE_FARMS_REPORTED_LABEL,
"fur_trading_status": NO_DATA_LABEL,
}
)
.rename(columns={"name": "country"}, errors="raise")
)
tb = pr.concat([tb, tb_added], ignore_index=True)
Expand Down Expand Up @@ -182,14 +187,24 @@ def prepare_fur_farming_ban_status(tb: Table) -> Table:
# Drop unnecessary column.
tb = tb.drop(columns=["phase_out_due_to_stricter_regulations"], errors="raise")

# There are countries (for now, Argentina and Brazil) that have no ban, but it's unclear if they have active farms (the column for operating fur farms says "NO DATA").
# Ensure they appear as "NO DATA".
select_unclear = tb["fur_farms_active"] == NO_DATA_LABEL
tb.loc[select_unclear, "fur_farming_status"] = NO_DATA_LABEL

# Map all fur farming statuses.
# NOTE: The data is ambiguous. There is "NO", "YES", missing data, and missing country.
# For now, assume that missing data means "NO", and missing country means "NO DATA".
tb["fur_farming_status"] = map_series(
tb["fur_farming_status"],
mapping=FUR_FARMING_BAN_STATUS,
warn_on_missing_mappings=True,
warn_on_unused_mappings=True,
show_full_warning=True,
)

# Some countries (for now, only Cyprus) has no ban, but no active farms. Also Israel has no data on a ban, but no active farms.
# In both cases, it should appears as no active farms reported.
tb.loc[(tb["fur_farming_status"] == "Not banned") & (tb["fur_farms_active"] == "NO"), "fur_farming_status"] = (
NO_ACTIVE_FARMS_REPORTED_LABEL
)

# For those years years that are in the future, change the status.
Expand Down
7 changes: 7 additions & 0 deletions etl/steps/data/meadow/animal_welfare/2024-12-17/fur_laws.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ def run(dest_dir: str) -> None:
#
# Process data.
#
# Remove last row, which simply gives general additional information.
# It says that any country that is not informed in the spreadsheet can be assumed to have no active fur farms.
# We confirmed this assumption with Fur Free Alliance.
error = "Spreadsheet has changed. Manually check this part of the code."
assert tb.iloc[-1]["COUNTRY"] == "No active fur farms have been reported in all other countries", error
tb = tb[:-1]

# Ensure all columns are snake-case, set an appropriate index, and sort conveniently.
# NOTE: Do not verify integrity, since there are duplicated countries (to be fixed in the garden step).
tb = tb.format(keys=["country"], verify_integrity=False)
Expand Down

0 comments on commit 0363c1f

Please sign in to comment.