Skip to content

Commit

Permalink
💄 modify tabulate code
Browse files Browse the repository at this point in the history
  • Loading branch information
paarriagadap committed Nov 22, 2024
1 parent e6eda7b commit 4b867f0
Showing 1 changed file with 20 additions and 12 deletions.
32 changes: 20 additions & 12 deletions etl/steps/data/garden/wid/2024-11-19/world_inequality_database.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""
Load World Inequality Database meadow dataset and create a garden dataset.
NOTE: To extract the log of the process (to review sanity checks, for example), run the following command in the terminal:
nohup uv run etl run world_inequality_database > output.log 2>&1 &
NOTE: To extract the log of the process (to review sanity checks, for example), follow these steps:
1. Define LONG_FORMAT as True.
2. Run the following command in the terminal:
nohup uv run etl run world_inequality_database > output.log 2>&1 &
"""

Expand All @@ -25,6 +27,9 @@
# Set table format when printing
TABLEFMT = "pretty"

# Define if I show the full table or just the first 5 rows for assertions
LONG_FORMAT = False


def run(dest_dir: str) -> None:
#
Expand Down Expand Up @@ -208,16 +213,16 @@ def check_between_0_and_1(tb: Table, variables: list, welfare: list):
# Filter only values lower than 0 or higher than 1
tb_error = tb[mask].copy().reset_index()
paths.log.fatal(
f"""Values for {col} are not between 0 and 1:
{_tabulate(tb_error[['country', 'year', col]])}"""
f"""{len(tb_error)} values for {col} are not between 0 and 1:
{_tabulate(tb_error[['country', 'year', col]], long_format=LONG_FORMAT)}"""
)

elif any_error and w == "wealth":
# Filter only values lower than 0 or higher than 1
tb_error = tb[mask].copy().reset_index()
paths.log.warning(
f"""Values for {col} are not between 0 and 1:
{_tabulate(tb_error[['country', 'year', col]])}"""
f"""{len(tb_error)} values for {col} are not between 0 and 1:
{_tabulate(tb_error[['country', 'year', col]], long_format=LONG_FORMAT)}"""
)

return tb
Expand Down Expand Up @@ -247,7 +252,7 @@ def check_shares_sum_100(tb: Table, welfare: list, margin: float):
tb_error = tb[mask].reset_index(drop=True).copy()
paths.log.fatal(
f"""{len(tb_error)} share observations ({w}{EXTRAPOLATED_DICT[e]}) are not adding up to 100%:
{_tabulate(tb_error[['country', 'year', 'sum_check']].sort_values(by='sum_check', ascending=False).reset_index(drop=True), floatfmt=".1f")}"""
{_tabulate(tb_error[['country', 'year', 'sum_check']].sort_values(by='sum_check', ascending=False).reset_index(drop=True), floatfmt=".1f", long_format=LONG_FORMAT)}"""
)

return tb
Expand All @@ -274,7 +279,7 @@ def check_negative_values(tb: Table):
tb_error = tb[mask].reset_index(drop=True).copy()
paths.log.warning(
f"""{len(tb_error)} observations for {v} are negative:
{_tabulate(tb_error[['country', 'year', v]])}"""
{_tabulate(tb_error[['country', 'year', v]], long_format=LONG_FORMAT)}"""
)

return tb
Expand Down Expand Up @@ -316,7 +321,7 @@ def check_monotonicity(tb: Table, metric: list, welfare: list):
tb_error = tb[mask].reset_index(drop=True).copy()
paths.log.fatal(
f"""{len(tb_error)} observations for {m}_{w}{EXTRAPOLATED_DICT[e]} are not monotonically increasing:
{_tabulate(tb_error[['country', 'year'] + cols], floatfmt=".2f")}"""
{_tabulate(tb_error[['country', 'year'] + cols], floatfmt=".2f", long_format=LONG_FORMAT)}"""
)

return tb
Expand Down Expand Up @@ -366,11 +371,14 @@ def check_avg_between_thr(tb: Table, welfare: list) -> Table:
tb_error = tb[mask].reset_index(drop=True).copy()
paths.log.fatal(
f"""{len(tb_error)} observations for avg {w}{EXTRAPOLATED_DICT[e]} are not between the corresponding thresholds:
{_tabulate(tb_error[['country', 'year'] + check_cols])}"""
{_tabulate(tb_error[['country', 'year'] + check_cols], long_format=LONG_FORMAT)}"""
)

return tb


def _tabulate(tb: Table, headers="keys", tablefmt=TABLEFMT, **kwargs):
return tabulate(tb.head(5), headers=headers, tablefmt=tablefmt, **kwargs)
def _tabulate(tb: Table, long_format: bool, headers="keys", tablefmt=TABLEFMT, **kwargs):
if long_format:
return tabulate(tb, headers=headers, tablefmt=tablefmt, **kwargs)
else:
return tabulate(tb.head(5), headers=headers, tablefmt=tablefmt, **kwargs)

0 comments on commit 4b867f0

Please sign in to comment.