Skip to content

Commit

Permalink
Add utils.checks.py: Check for common issues with function inputs - f…
Browse files Browse the repository at this point in the history
…ixes #86
  • Loading branch information
mdancho84 committed Oct 5, 2023
1 parent 71828e6 commit 967d0ac
Show file tree
Hide file tree
Showing 18 changed files with 234 additions and 134 deletions.
2 changes: 2 additions & 0 deletions src/pytimetk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@
from .core.fourier import *
from .core.ts_features import *
from .core.ts_summary import *
from .core.anomaly import *

from .datasets.get_datasets import *

from .utils.datetime_helpers import *
from .utils.pandas_helpers import *
from .utils.memory_helpers import *
from .utils.plot_helpers import *
from .utils.checks import *

# *** Needed for quartodoc build important functions ***
from .plot.plot_timeseries import (
Expand Down
12 changes: 8 additions & 4 deletions src/pytimetk/core/expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np

from typing import Union, Optional, Callable, Tuple
from pytimetk.utils.checks import check_dataframe_or_groupby, check_date_column, check_value_column

@pf.register_dataframe_method
def augment_expanding(
Expand Down Expand Up @@ -133,6 +134,12 @@ def regression(df):
regression_wide_df
```
'''
# Common checks
check_dataframe_or_groupby(data)
check_date_column(data, date_column)
check_value_column(data, value_column)

# Expanding Apply Function for Functions that Require Independent Variables
def expanding_apply(func, df, min_periods):
n_rows = len(df)
results = [np.nan] * n_rows
Expand All @@ -144,9 +151,6 @@ def expanding_apply(func, df, min_periods):

return pd.DataFrame({'result': results}, index=df.index)


if not isinstance(data, (pd.DataFrame, pd.core.groupby.generic.DataFrameGroupBy)):
raise TypeError("`data` must be a Pandas DataFrame or GroupBy object.")

if isinstance(value_column, str):
value_column = [value_column]
Expand Down Expand Up @@ -206,4 +210,4 @@ def expanding_apply(func, df, min_periods):


# Monkey patch the method to pandas groupby objects
pd.core.groupby.generic.DataFrameGroupBy.augment_expanding = augment_expanding
pd.core.groupby.generic.DataFrameGroupBy.augment_expanding = augment_expanding
17 changes: 7 additions & 10 deletions src/pytimetk/core/fourier.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import pandas as pd
import numpy as np
import pandas_flavor as pf
from typing import Union
from typing import Union, List
from pytimetk.utils.checks import check_dataframe_or_groupby, check_date_column, check_value_column

@pf.register_dataframe_method
def augment_fourier(
data: Union[pd.DataFrame, pd.core.groupby.generic.DataFrameGroupBy],
date_column: str,
value_column: str or list,
value_column: Union[str, List[str]],
num_periods: int = 1,
max_order: int = 1
) -> pd.DataFrame:
Expand Down Expand Up @@ -58,18 +59,14 @@ def augment_fourier(
"""

# Check if data is a Pandas DataFrame or GroupBy object
if not isinstance(data, pd.DataFrame):
if not isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
raise TypeError("`data` is not a Pandas DataFrame or GroupBy object.")
# Common checks
check_dataframe_or_groupby(data)
check_date_column(data, date_column)
check_value_column(data, value_column)

if isinstance(value_column, str):
value_column = [value_column]

# Ensure the date column is of datetime type
# if not pd.api.types.is_datetime64_ns_dtype(data[date_column]):
# data[date_column] = pd.to_datetime(data[date_column])

# DATAFRAME EXTENSION - If data is a Pandas DataFrame, extend with Fourier transforms
if isinstance(data, pd.DataFrame):

Expand Down
15 changes: 7 additions & 8 deletions src/pytimetk/core/holiday_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

from typing import Union

from pytimetk.utils.checks import check_dataframe_or_groupby, check_date_column, check_series_or_datetime

try:
import holidays
except ImportError:
Expand Down Expand Up @@ -153,17 +155,12 @@ def augment_holiday_signature(
except ImportError:
raise ImportError("The 'holidays' package is not installed. Please install it by running 'pip install holidays'.")

# Check if data is a Pandas DataFrame or GroupBy object
if not isinstance(data, pd.DataFrame):
if not isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
raise TypeError("`data` is not a Pandas DataFrame or GroupBy object.")
# Common checks
check_dataframe_or_groupby(data)
check_date_column(data, date_column)

if isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
data = data.obj

# Ensure the date column exists in the DataFrame
if date_column not in data.columns:
raise ValueError(f"'{date_column}' not found in DataFrame columns.")

# Extract start and end years directly from the Series
start_year = data[date_column].min().year
Expand Down Expand Up @@ -367,6 +364,8 @@ def get_holiday_signature(
pd.Series(dates, name='dates').get_holiday_signature('UnitedStates')
```
"""
# Common checks
check_series_or_datetime(idx)

# This function requires the holidays package to be installed
try:
Expand Down
16 changes: 9 additions & 7 deletions src/pytimetk/core/lags.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import pandas as pd
import numpy as np
import pandas_flavor as pf
from typing import Union
from typing import Union, List, Tuple

from pytimetk.utils.checks import check_dataframe_or_groupby, check_date_column, check_value_column

@pf.register_dataframe_method
def augment_lags(
data: Union[pd.DataFrame, pd.core.groupby.generic.DataFrameGroupBy],
date_column: str,
value_column: str or list,
lags: int or tuple or list = 1
value_column: Union[str, List[str]],
lags: Union[int, Tuple[int, int], List[int]] = 1
) -> pd.DataFrame:
"""
Adds lags to a Pandas DataFrame or DataFrameGroupBy object.
Expand Down Expand Up @@ -91,10 +93,10 @@ def augment_lags(
"""

# Check if data is a Pandas DataFrame or GroupBy object
if not isinstance(data, pd.DataFrame):
if not isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
raise TypeError("`data` is not a Pandas DataFrame or GroupBy object.")
# Common checks
check_dataframe_or_groupby(data)
check_date_column(data, date_column)
check_value_column(data, value_column)

if isinstance(value_column, str):
value_column = [value_column]
Expand Down
16 changes: 9 additions & 7 deletions src/pytimetk/core/leads.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import pandas as pd
import numpy as np
import pandas_flavor as pf
from typing import Union
from typing import Union, List, Tuple

from pytimetk.utils.checks import check_dataframe_or_groupby, check_date_column, check_value_column

@pf.register_dataframe_method
def augment_leads(
data: Union[pd.DataFrame, pd.core.groupby.generic.DataFrameGroupBy],
date_column: str,
value_column: str or list,
leads: int or tuple or list = 1
value_column: Union[str, List[str]],
leads: Union[int, Tuple[int, int], List[int]] = 1
) -> pd.DataFrame:
"""
Adds leads to a Pandas DataFrame or DataFrameGroupBy object.
Expand Down Expand Up @@ -84,10 +86,10 @@ def augment_leads(
"""

# Check if data is a Pandas DataFrame or GroupBy object
if not isinstance(data, pd.DataFrame):
if not isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
raise TypeError("`data` is not a Pandas DataFrame or GroupBy object.")
# Common checks
check_dataframe_or_groupby(data)
check_date_column(data, date_column)
check_value_column(data, value_column)

if isinstance(value_column, str):
value_column = [value_column]
Expand Down
16 changes: 8 additions & 8 deletions src/pytimetk/core/make_future_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

from pytimetk.core.ts_summary import get_pandas_frequency

from pytimetk.utils.checks import check_dataframe_or_groupby, check_date_column, check_value_column, check_series_or_datetime

@pf.register_series_method
def make_future_timeseries(
idx: Union[pd.Series, pd.DatetimeIndex],
Expand Down Expand Up @@ -88,14 +90,13 @@ def make_future_timeseries(
```
'''

# Check if idx is a Series or DatetimeIndex
check_series_or_datetime(idx)

# If idx is a DatetimeIndex, convert to Series
if isinstance(idx, pd.DatetimeIndex):
idx = pd.Series(idx, name="idx")

# Check if idx is a Series
if not isinstance(idx, pd.Series):
raise TypeError('idx must be a pandas Series or DatetimeIndex object')

# Create a DatetimeIndex from the provided dates
dt_index = pd.DatetimeIndex(pd.Series(idx).values)

Expand Down Expand Up @@ -236,10 +237,9 @@ def future_frame(
```
'''

# Check if data is a Pandas DataFrame or GroupBy object
if not isinstance(data, pd.DataFrame):
if not isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
raise TypeError("`data` is not a Pandas DataFrame.")
# Common checks
check_dataframe_or_groupby(data)
check_date_column(data, date_column)

# DATAFRAME EXTENSION - If data is a Pandas DataFrame, extend with future dates

Expand Down
12 changes: 7 additions & 5 deletions src/pytimetk/core/pad.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@


import pandas as pd
import pandas_flavor as pf
from typing import Union

from pytimetk.utils.checks import check_dataframe_or_groupby, check_date_column, check_value_column

@pf.register_dataframe_method
def pad_by_time(
data: Union[pd.DataFrame, pd.core.groupby.generic.DataFrameGroupBy],
Expand Down Expand Up @@ -103,15 +103,17 @@ def pad_by_time(
)
padded_df
'''
if not isinstance(data, (pd.DataFrame, pd.core.groupby.generic.DataFrameGroupBy)):
raise TypeError("`data` must be a Pandas DataFrame or DataFrameGroupBy object.")
# Common checks
check_dataframe_or_groupby(data)
check_date_column(data, date_column)

# Prep Inputs
if start_date is not None:
start_date = pd.Timestamp(start_date)
if end_date is not None:
end_date = pd.Timestamp(end_date)

# Check if start_date is greater than end_date
if start_date and end_date:
if start_date > end_date:
raise ValueError("Start date cannot be greater than end date.")
Expand Down
79 changes: 35 additions & 44 deletions src/pytimetk/core/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from typing import Union, Optional, Callable, Tuple, List

from pytimetk.utils.checks import check_dataframe_or_groupby, check_date_column, check_value_column

@pf.register_dataframe_method
def augment_rolling(
data: Union[pd.DataFrame, pd.core.groupby.generic.DataFrameGroupBy],
Expand Down Expand Up @@ -168,7 +170,39 @@ def regression(df):
```
'''

# Common checks
check_dataframe_or_groupby(data)
check_date_column(data, date_column)
check_value_column(data, value_column)

# Specific checks
if isinstance(value_column, str):
value_column = [value_column]

if not isinstance(window, (int, tuple, list)):
raise TypeError("`window` must be an integer, tuple, or list.")

if isinstance(window, int):
window = [window]
elif isinstance(window, tuple):
window = list(range(window[0], window[1] + 1))

if isinstance(window_func, (str, tuple)):
window_func = [window_func]

# Copy Data
data_copy = data.copy() if isinstance(data, pd.DataFrame) else data.obj.copy()

# Format Data
if isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
group_names = data.grouper.names
grouped = data_copy.sort_values(by=[*group_names, date_column]).groupby(group_names)
else:
group_names = None
grouped = [([], data_copy.sort_values(by=[date_column]))]


# Rolling Apply Function when independent variables are required
def rolling_apply_2(func, df, window_size, min_periods, center):
results = [np.nan] * len(df)
adjusted_window = window_size // 2 if center else window_size - 1 # determine the offset for centering
Expand All @@ -191,52 +225,9 @@ def rolling_apply_2(func, df, window_size, min_periods, center):
results[center_point if center else end - 1] = func(window_df)

return pd.DataFrame(results, columns=['result'], index=df.index)



# def rolling_apply(func, series, *args):
# result = series.rolling(window=window_size, center=center, **kwargs).apply(lambda x: func(x, *args), raw=False)
# return result

# def rolling_apply_2(func, df):

# results = []
# for start in range(len(df) - window_size + 1):
# window_df = df.iloc[start:start + window_size]
# result = func(window_df)
# results.append(result)

# ret = pd.DataFrame(results, index=df.index[window_size - 1:])

# return ret


if not isinstance(data, (pd.DataFrame, pd.core.groupby.generic.DataFrameGroupBy)):
raise TypeError("`data` must be a Pandas DataFrame or GroupBy object.")

if isinstance(value_column, str):
value_column = [value_column]

if not isinstance(window, (int, tuple, list)):
raise TypeError("`window` must be an integer, tuple, or list.")

if isinstance(window, int):
window = [window]
elif isinstance(window, tuple):
window = list(range(window[0], window[1] + 1))

if isinstance(window_func, (str, tuple)):
window_func = [window_func]

data_copy = data.copy() if isinstance(data, pd.DataFrame) else data.obj.copy()

if isinstance(data, pd.core.groupby.generic.DataFrameGroupBy):
group_names = data.grouper.names
grouped = data_copy.sort_values(by=[*group_names, date_column]).groupby(group_names)
else:
group_names = None
grouped = [([], data_copy.sort_values(by=[date_column]))]

# Apply Rolling Functions
result_dfs = []
for _, group_df in grouped:
for value_col in value_column:
Expand Down
Loading

0 comments on commit 967d0ac

Please sign in to comment.