Skip to content

Commit

Permalink
Merge pull request #41 from prio-data/improve_docstrings
Browse files Browse the repository at this point in the history
improve docstrings in legacy functions
  • Loading branch information
jimdale authored Nov 21, 2023
2 parents bb795fd + 9368ec2 commit 2023072
Show file tree
Hide file tree
Showing 3 changed files with 202 additions and 32 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[tool.poetry]
name = "views-transformation-library"

version = "2.7.1"
version = "2.7.2"
description = "A package containing data transformation functions used by the ViEWS team"
homepage = "https://www.github.com/prio-data/views_transformation_library"
readme = "README.md"
Expand Down
40 changes: 32 additions & 8 deletions views_transformation_library/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,15 @@
from sklearn.linear_model import BayesianRidge # type: ignore

def replace_na(df: pd.DataFrame, replacement = 0):
"""
replace_na
Replaces NaNs in the input dataframe with the specified value (which defaults to zero)
Arguments:
value: quantity which will replace Nan (defaults to zero)
"""
return df.replace(np.nan,replacement)

def list_totally_missing(df: pd.DataFrame) -> List[str]:
Expand All @@ -29,7 +38,6 @@ def list_totally_missing(df: pd.DataFrame) -> List[str]:
return cols



def fill_groups_with_time_means(df: pd.DataFrame) -> pd.DataFrame:
""" Fill completely missing groups with time means """

Expand All @@ -47,7 +55,6 @@ def fill_groups_with_time_means(df: pd.DataFrame) -> pd.DataFrame:
return df



def fill_with_group_and_global_means(df: pd.DataFrame) -> pd.DataFrame:
""" Impute missing values to group-level or global means. """

Expand All @@ -62,13 +69,24 @@ def fill_with_group_and_global_means(df: pd.DataFrame) -> pd.DataFrame:
return df



def extrapolate(
df: pd.DataFrame,
limit_direction: str = "both",
limit_area: Optional[str] = None,
) -> pd.DataFrame:
""" Interpolate and extrapolate """
"""
extrapolate
Perform linear interpolation and/or extrapolation over NaNs by spatial unit
Arguments:
limit_direction: 'forward', 'backward', 'both': consecutive NaNs will be filled in this direction
limit_area: None, 'inside', 'outside': if 'inside', NaNs will only be filled if bracketed by valid values (i.e.
interpolation) . If 'outside', NaNs are only filled outside valid values (i.e. extrapolation). If None, both
interpolation and extrapolation are performed
"""

return (
df.sort_index()
.groupby(level=1)
Expand Down Expand Up @@ -115,12 +133,18 @@ def fill(
limit_direction: Literal["forward", "backward", "both"] = "both",
limit_area: Optional[Literal["inside", "outside"]] = None,
) -> pd.Series:
""" Fill column in dataframe with optional direction and area.
"""
fill
Perform forward and/or backward filling by spatial unit
Args:
s: Pandas series to apply filling to.
limit_direction: Direction in which to fill.
limit_area: Area to fill. Default None refers to the entire series.
limit_direction: 'forward', 'backward', 'both': Direction in which to fill. 'forward' propagates most recent
valid value forward. 'backward' propagates oldest valid value backwards. 'both' performs a forward propagation,
followed by a backward propagation
limit_area: None, 'inside', 'outside': if 'inside', NaNs will only be filled if bracketed by valid values.
If 'outside', NaNs are only filled outside valid values. If None, no restrictions are applied.
"""


Expand Down
Loading

0 comments on commit 2023072

Please sign in to comment.