[docs]defdate_offset(n:int,freq:str)->pd.DateOffset:
-"""Generate a pandas DateOffset based on the given frequency and value.
+
[docs]defget_item_names_filter(items_list:List[str]|str)->str:
+"""
+ Generate a `"WHERE"` clause component to filter values from column `"item_name"`. Parameters ----------
- n : int
- The number of time units for the offset.
- freq : str {'days', 'weeks', 'months', 'years'}
- The frequency type. Valid options are 'days', 'weeks', 'months', 'years'.
-
- Returns
- -------
- pd.DateOffset
- A DateOffset object for the specified frequency and value.
-
- Raises
- ------
- ValueError
- If `freq` is not one of the valid options.
- """
- iffreq=="days":
- returnpd.DateOffset(days=n)
- iffreq=="weeks":
- returnpd.DateOffset(weeks=n)
- iffreq=="months":
- returnpd.DateOffset(months=n)
- iffreq=="years":
- returnpd.DateOffset(years=n)
- raiseValueError(
- f"The specified `freq` {freq} is not a valid frequency. "
- "Valid frequencies are: 'days', 'weeks', 'months', 'years'."
- )
-
-
-
[docs]defget_item_names_filter(items_list:List[str]|str)->str:
-"""
- Generate a "WHERE" clause component to filter values from column `"item_name"`.
-
items_list : List[str] | str
- Item name or names to add to the "WHERE" clause component.
+ Item name or names to add to the `"WHERE"` clause component. Returns ------- str
- The "WHERE" clause component that can be used to filter values from column `"item_name"`.
+ The `"WHERE"` clause component that can be used to filter values from column `"item_name"`. Examples --------
@@ -369,17 +338,19 @@
Source code for iowa_forecast.load_data
return"("+" OR ".join(f'item_name = "{item_name}"'foritem_nameinitems_list)+")"
[docs]defget_min_datapoints_filter(min_size:int)->str:"""
- Generate a "WHERE" clause to filter items that have at least `min_size` observations.
+ Generate a `"WHERE"` clause to filter items that have at least `min_size` observations.
+ Parameters
+ ---------- min_size : int
- Minimum number of observations to use as value for the "WHERE" clause.
+ Minimum number of observations to use as value for the `"WHERE"` clause. Returns ------- str
- The "WHERE" clause component.
+ The `"WHERE"` clause component. """returnf""" WHERE
@@ -388,7 +359,7 @@
[docs]defget_year_weather_query(year:int,state:str="IA")->str:""" Generate an SQL query to retrieve weather data for a specific year and state.
@@ -650,7 +621,7 @@
[docs]defget_weather_query(start_date:str,end_date:str,state:str="IA")->str:""" Generate an SQL query to retrieve weather data for a given date range.
@@ -673,7 +644,7 @@
[docs]defget_data(client:bigquery.Client,query:str)->pd.DataFrame:""" Execute a BigQuery SQL query and return the result as a DataFrame.
@@ -397,7 +398,7 @@
[docs]defcreate_model_query(# pylint: disable=too-many-argumentsitem_name:str,timestamp_col:str="date",time_series_data_col:str="total_amount_sold",model_name:str="bqmlforecast.arima_plus_xreg_model",train_table_name:str="bqmlforecast.training_data",test_table_name:str="bqmlforecast.test_data",
- holiday_region:str="US",
- auto_arima:bool=True,
- adjust_step_changes:bool=True,
- clean_spikes_and_dips:bool=True,
+ **kwargs,)->str:""" Generate a BigQuery 'CREATE MODEL' query for a specified item.
@@ -323,33 +322,39 @@
Source code for iowa_forecast.ml_train
The base name for the model. train_table_name : str, default="bqmlforecast.training_data" The name of the table containing training data.
- test_table_name : str, default="bqmlforecast.test_data"
+ test_table_name : str | None, default="bqmlforecast.test_data" The name of the table containing test data.
- holiday_region : str, default="US"
- The holiday region to be used by the model.
- auto_arima : bool, default=True
- Whether to enable AUTO_ARIMA.
- adjust_step_changes : bool, default=True
- Whether to adjust for step changes in the data.
- clean_spikes_and_dips : bool, default=True
- Whether to clean spikes and dips in the data.
+ **kwargs : Any
+ Additional keyword arguments such as:
+
+ holiday_region : str, default="US"
+ The holiday region to be used by the model.
+ auto_arima : bool, default=True
+ Whether to enable AUTO_ARIMA.
+ adjust_step_changes : bool, default=True
+ Whether to adjust for step changes in the data.
+ clean_spikes_and_dips : bool, default=True
+ Whether to clean spikes and dips in the data. Returns ------- str A SQL query string for creating the specified model. """
+ configs=ARIMA_PLUS_XREG_Config(**kwargs)item_name_norm=normalize_item_name(item_name)
+ test_table_query=include_test_on_model_train(item_name,timestamp_col,
+ train_table_name,test_table_name)returnf""" CREATE OR REPLACE MODEL `{model_name}_{item_name_norm}` OPTIONS( MODEL_TYPE='ARIMA_PLUS_XREG', TIME_SERIES_TIMESTAMP_COL='{timestamp_col}', TIME_SERIES_DATA_COL='{time_series_data_col}',
- HOLIDAY_REGION='{holiday_region}',
- AUTO_ARIMA={auto_arima},
- ADJUST_STEP_CHANGES={adjust_step_changes},
- CLEAN_SPIKES_AND_DIPS={clean_spikes_and_dips}
+ HOLIDAY_REGION='{configs.holiday_region}',
+ AUTO_ARIMA={configs.auto_arima},
+ ADJUST_STEP_CHANGES={configs.adjust_step_changes},
+ CLEAN_SPIKES_AND_DIPS={configs.clean_spikes_and_dips} ) AS SELECT *
@@ -357,6 +362,44 @@
Source code for iowa_forecast.ml_train
`{train_table_name}` WHERE item_name = "{item_name}"
+{test_table_query}
+ ORDER BY
+ date
+ """
+
+
+
[docs]definclude_test_on_model_train(
+ item_name:str,
+ timestamp_col:str,
+ train_table_name:str,
+ test_table_name:str|None=None,
+)->str:
+"""
+ Include test data in the model training process.
+
+ This function generates an SQL query component to union test data with
+ training data if a test table is specified.
+
+ Parameters
+ ----------
+ item_name : str
+ The name of the item being modeled.
+ timestamp_col : str
+ The column name representing the timestamp in the dataset.
+ train_table_name : str
+ The name of the table containing training data.
+ test_table_name : str or None, optional
+ The name of the table containing test data. If None, no test data
+ is included.
+
+ Returns
+ -------
+ str
+ An SQL query string component to include test data.
+ """
+ ifnotisinstance(test_table_name,str):
+ return""
+ returnf""" UNION ALL ( SELECT
@@ -383,12 +426,75 @@
Source code for iowa_forecast.ml_train
AND t2.item_name = "{item_name}" ) )
- ORDER BY
- date """
[docs]definclude_test_on_arima_model_train(
+ column:str,
+ time_series_timestamp_col:str,
+ time_series_id_col:str,
+ train_table_name:str,
+ test_table_name:str|None=None,
+)->str:
+"""
+ Include test data in the uni-variate ARIMA model training process.
+
+ This function generates an SQL query component to union test data with
+ training data if a test table is specified.
+
+ Parameters
+ ----------
+ column : str
+ The name of the feature being modeled.
+ time_series_timestamp_col : str
+ The column name representing the timestamp in the dataset.
+ time_series_id_col : str
+ The column name representing the identifier.
+ train_table_name : str
+ The name of the table containing training data.
+ test_table_name : str or None, optional
+ The name of the table containing test data. If None, no test data
+ is included.
+
+ Returns
+ -------
+ str
+ An SQL query string component to include test data.
+ """
+ ifnotisinstance(test_table_name,str):
+ return""
+ returnf"""
+ UNION ALL
+ (
+ SELECT
+ *
+ FROM (
+ SELECT
+ t2.{time_series_timestamp_col},
+ t2.{column},
+ t2.{time_series_id_col}
+ FROM
+ `{test_table_name}` AS t2
+ JOIN
+ (
+ SELECT
+{time_series_id_col},
+ MAX({time_series_timestamp_col}) AS max_date
+ FROM
+ `{train_table_name}`
+ GROUP BY
+{time_series_id_col}
+ ) AS md
+ ON
+ t2.{time_series_id_col} = md.{time_series_id_col}
+ WHERE
+ t2.{time_series_timestamp_col} > md.max_date
+ )
+ )
+ """
time_series_data_col:str="total_amount_sold",model_name:str="bqmlforecast.arima_plus_xreg_model",train_table_name:str="bqmlforecast.training_data",
- test_table_name:str="bqmlforecast.test_data",
- holiday_region:str="US",
- auto_arima:bool=True,
- adjust_step_changes:bool=True,
- clean_spikes_and_dips:bool=True,
+ test_table_name:str|None="bqmlforecast.test_data",
+ **kwargs,)->None:"""
- Create ARIMA_PLUS_XREG models for a list of items.
+ Create `'ARIMA_PLUS_XREG'` models for a list of items.
- This function generates and executes a CREATE MODEL query
+ This function generates and executes a `'CREATE MODEL'` query for each item in the provided list. The models are created using the specified training and test tables in BigQuery.
@@ -480,16 +583,21 @@
Source code for iowa_forecast.ml_train
The base name for the models. train_table_name : str, default="bqmlforecast.training_data" The name of the table containing training data.
- test_table_name : str, default="bqmlforecast.test_data"
+ test_table_name : str | None, default="bqmlforecast.test_data" The name of the table containing test data.
- holiday_region : str, default="US"
- The holiday region to be used by the models.
- auto_arima : bool, default=True
- Whether to enable AUTO_ARIMA.
- adjust_step_changes : bool, default=True
- Whether to adjust for step changes in the data.
- clean_spikes_and_dips : bool, default=True
- Whether to clean spikes and dips in the data.
+ If `None`, then only the data from `train_table_name` is used for
+ training the model. See the 'Notes' section for more information.
+ **kwargs : Any
+ Additional keyword arguments such as:
+
+ holiday_region : str, default="US"
+ The holiday region to be used by the models.
+ auto_arima : bool, default=True
+ Whether to enable `'AUTO_ARIMA'`.
+ adjust_step_changes : bool, default=True
+ Whether to adjust for step changes in the data.
+ clean_spikes_and_dips : bool, default=True
+ Whether to clean spikes and dips in the data. Notes -----
@@ -502,6 +610,13 @@
Source code for iowa_forecast.ml_train
If using a Google Cloud account with billing enabled, running this code might incur charges.
+
+ If you are evaluating the model, you shouldn't use all available data
+ to train the model. Therefore, if you're evaluating the model, consider
+ setting the parameter `test_table_name` to `None`. Doing so will cause
+ the model to be trained using only the specified data from the
+ `train_table_name` which in turn will allow you to use the data from
+ `test_table_name` for evaluation. """_items_list=(items_listifnotisinstance(max_items,int)elseitems_list[:max_items]
@@ -514,30 +629,29 @@
[docs]deftrain_arima_models(# pylint: disable=too-many-locals, too-many-argumentsclient:bigquery.Client,columns:List[str],model:str="bqmlforecast.arima_model",train_table_name:str="bqmlforecast.training_data",
- test_table_name:str="bqmlforecast.test_data",
+ test_table_name:str|None="bqmlforecast.test_data",model_metrics_table_name:str|None="bqmlforecast.arima_model_metrics",time_series_timestamp_col:str="date",time_series_id_col:str="item_name",
- confidence_level=0.9,
- horizon=7,
+ confidence_level:float=0.9,
+ horizon:int=7,
+ use_test_data_on_train:bool=True,
+ **kwargs,):""" Train ARIMA models for a list of columns and store their metrics.
- This function generates and executes 'CREATE MODEL' queries for ARIMA
+ This function generates and executes `'CREATE MODEL'` queries for ARIMA models using the specified columns, and evaluates their performance by creating tables of model metrics.
@@ -554,7 +668,7 @@
Source code for iowa_forecast.ml_train
The base name for the ARIMA models. train_table_name : str, default="bqmlforecast.training_data" The name of the table containing training data.
- test_table_name : str, default="bqmlforecast.test_data"
+ test_table_name : str | None, default="bqmlforecast.test_data" The name of the table containing test data. model_metrics_table_name : str or None, default="bqmlforecast.arima_model_metrics" The base name for the tables where model metrics will be stored.
@@ -566,25 +680,37 @@
Source code for iowa_forecast.ml_train
The confidence level used in the model evaluation. horizon : int, default=7 The number of time steps (days) to forecast.
-
+ use_test_data_on_train : bool, default=True
+ Whether to use test data during model training. """
+ config=ARIMAConfig(**kwargs)
+
forcolumnintrack(columns,description="Creating ARIMA models..."):model_name=f"{model}_{column}"
+ test_data_query=""
+ ifuse_test_data_on_train:
+ test_data_query=include_test_on_arima_model_train(
+ column,
+ time_series_timestamp_col,
+ time_series_id_col,
+ train_table_name,
+ test_table_name,
+ )train_arima_query=f""" CREATE OR REPLACE MODEL `{model_name}` OPTIONS(
- MODEL_TYPE = 'ARIMA_PLUS',
- AUTO_ARIMA = TRUE,
+ MODEL_TYPE = '{config.model_type}',
+ AUTO_ARIMA = {config.auto_arima}, HORIZON = {horizon}, TIME_SERIES_TIMESTAMP_COL = '{time_series_timestamp_col}', TIME_SERIES_DATA_COL = '{column}', TIME_SERIES_ID_COL = '{time_series_id_col}',
- FORECAST_LIMIT_LOWER_BOUND = 0,
- DECOMPOSE_TIME_SERIES = TRUE,
- HOLIDAY_REGION = 'US',
- DATA_FREQUENCY = 'AUTO_FREQUENCY',
- ADJUST_STEP_CHANGES = TRUE,
- CLEAN_SPIKES_AND_DIPS = TRUE
+ FORECAST_LIMIT_LOWER_BOUND = {config.forecast_limit_lower_bound},
+ DECOMPOSE_TIME_SERIES = {config.decompose_time_series},
+ HOLIDAY_REGION = '{config.holiday_region}',
+ DATA_FREQUENCY = '{config.data_frequency}',
+ ADJUST_STEP_CHANGES = {config.adjust_step_changes},
+ CLEAN_SPIKES_AND_DIPS = {config.clean_spikes_and_dips} ) AS SELECT{time_series_timestamp_col},
@@ -592,33 +718,7 @@
Source code for iowa_forecast.ml_train
{time_series_id_col} FROM `{train_table_name}`
- UNION ALL
- (
- SELECT
- *
- FROM (
- SELECT
- t2.{time_series_timestamp_col},
- t2.{column},
- t2.{time_series_id_col}
- FROM
- `{test_table_name}` AS t2
- JOIN
- (
- SELECT
-{time_series_id_col},
- MAX({time_series_timestamp_col}) AS max_date
- FROM
- `{train_table_name}`
- GROUP BY
-{time_series_id_col}
- ) AS md
- ON
- t2.{time_series_id_col} = md.{time_series_id_col}
- WHERE
- t2.{time_series_timestamp_col} > md.max_date
- )
- )
+{test_data_query} """train_arima_job=client.query(train_arima_query)train_arima_job.result()
diff --git a/_modules/iowa_forecast/models_configs.html b/_modules/iowa_forecast/models_configs.html
new file mode 100644
index 0000000..8681a4f
--- /dev/null
+++ b/_modules/iowa_forecast/models_configs.html
@@ -0,0 +1,457 @@
+
+
+
+
+
+
+
+ iowa_forecast.models_configs - Iowa Liquor Sales Forecast 0.0.1 documentation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Skip to content
+
+
+
+
+"""
+This module provides classes for managing and validating configuration parameters
+for various machine learning models, specifically focusing on ARIMA-based models.
+The module includes a base class that standardizes the process of handling model
+configuration, ensuring that all derived classes follow a consistent pattern for
+validating and setting parameters.
+
+Classes
+-------
+AbstractBaseModelConfig : ABC
+ An abstract base class for the `BaseModelConfig` class.
+
+BaseModelConfig : AbstractBaseModelConfig
+ A base class that provides common functionality for model
+ configuration, including parameter validation, default value handling, and
+ error checking. Subclasses are required to define a `SUPPORTED_PARAMETERS`
+ dictionary that specifies the expected parameter types, default values,
+ and any valid choices.
+
+ARIMAConfig : BaseModelConfig
+ A configuration class for ARIMA model parameters. Inherits from `BaseModelConfig`
+ and defines specific parameters used by ARIMA and ARIMA_PLUS models. This class
+ ensures that the parameters adhere to the expected types and valid choices.
+
+ARIMA_PLUS_XREG_Config : BaseModelConfig
+ A configuration class for ARIMA_PLUS_XREG model parameters. This class extends
+ `BaseModelConfig` and includes additional parameters for handling exogenous
+ variables (`xreg_features`) and other settings specific to the `ARIMA_PLUS_XREG` model.
+
+Usage
+-----
+These configuration classes are intended to be used in the setup and validation of
+model parameters before they are passed to machine learning model training functions.
+By leveraging these classes, developers can ensure that all configuration parameters
+are correctly typed, fall within valid ranges, and adhere to expected choices, reducing
+the likelihood of runtime errors.
+
+Example
+-------
+>>> config = ARIMAConfig(model_type="ARIMA")
+>>> print(config.model_type)
+'ARIMA'
+
+>>> xreg_config = ARIMA_PLUS_XREG_Config(
+... model_type="ARIMA_PLUS_XREG",
+... xreg_features=["feature1", "feature2"],
+... non_negative_forecast=True
+... )
+>>> print(xreg_config.xreg_features)
+['feature1', 'feature2']
+"""
+fromabcimportABC,abstractmethod
+fromtypingimportAny,Dict,Tuple,List
+
+
+
[docs]classAbstractBaseModelConfig(ABC):# pylint: disable=too-few-public-methods
+"""Abstract base class for `BaseModelConfig` configuration class."""
+
+ @property
+ @abstractmethod
+ defSUPPORTED_PARAMETERS(self)->Dict[# pylint: disable=invalid-name
+ str,Tuple[Any,Any,List[Any]]
+ ]:
+"""
+ This abstract property must be implemented by subclasses.
+ It should return a dictionary where the keys are parameter names,
+ and the values are tuples containing the expected type, default value,
+ and a list of valid choices (if any).
+ """
+
+
+
[docs]classBaseModelConfig(AbstractBaseModelConfig):
+"""
+ Base class for model configuration parameters.
+
+ This class provides common functionality for handling configuration parameters
+ passed via kwargs, including unpacking, validation, and setting default values.
+
+ Subclasses must define the `SUPPORTED_PARAMETERS` dictionary, which specifies
+ the expected parameter types, default values, and any restricted choices.
+ """
+
+ @property
+ defSUPPORTED_PARAMETERS(self)->Dict[str,Tuple[Any,Any,List[Any]]]:
+ return{}
+
+ def__init__(self,**kwargs):
+ self._params={}
+ self._validate_and_set_parameters(kwargs)
+
+ def_validate_and_set_parameters(self,kwargs:Dict[str,Any]):
+ forkey,(expected_type,default_value,choices)inself.SUPPORTED_PARAMETERS.items():
+ ifkeyinkwargs:
+ value=kwargs[key]
+ ifnotisinstance(value,expected_type):
+ raiseValueError(
+ f"Invalid value for parameter '{key}': expected {expected_type.__name__}, "
+ f"but got {type(value).__name__}."
+ )
+ ifchoicesandvaluenotinchoices:
+ raiseValueError(
+ f"Invalid value for parameter '{key}': got '{value}', "
+ f"but expected one of {choices}."
+ )
+ self._params[key]=value
+ else:
+ self._params[key]=default_value
+
+ # Identify unsupported parameters
+ unsupported_params=set(kwargs)-set(self.SUPPORTED_PARAMETERS)
+ ifunsupported_params:
+ raiseValueError(
+ f"Unsupported parameters provided: {', '.join(unsupported_params)}. "
+ "Please check your input."
+ )
+
+ def__getattr__(self,name:str)->Any:
+ ifnameinself._params:
+ returnself._params[name]
+ raiseAttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")
+
+
+
[docs]classARIMAConfig(BaseModelConfig):# pylint: disable=too-few-public-methods
+"""
+ Configuration class for `'ARIMA'` model parameters.
+
+ Inherits common functionality from `BaseModelConfig` and defines specific
+ parameters for `'ARIMA'` models, including validation of choices for some
+ parameters.
+ """
+
+ @property
+ defSUPPORTED_PARAMETERS(self)->Dict[str,Tuple[Any,Any,List[Any]]]:
+ return{
+ "model_type":(str,"ARIMA_PLUS",["ARIMA_PLUS","ARIMA"]),
+ "auto_arima":(bool,True,[]),
+ "forecast_limit_lower_bound":(int,0,[]),
+ "clean_spikes_and_dips":(bool,True,[]),
+ "decompose_time_series":(bool,True,[]),
+ "holiday_region":(str,"US",[]),
+ "data_frequency":(str,"AUTO_FREQUENCY",
+ ["AUTO_FREQUENCY","DAILY","WEEKLY","MONTHLY"]),
+ "adjust_step_changes":(bool,True,[]),
+ }
+
+
+
[docs]classARIMA_PLUS_XREG_Config(BaseModelConfig):# pylint: disable=invalid-name, too-few-public-methods
+"""
+ Configuration class for `'ARIMA_PLUS_XREG'` model parameters.
+
+ Inherits common functionality from `BaseModelConfig` and defines specific
+ parameters for `'ARIMA_PLUS_XREG'` models, including validation of choices for
+ some parameters.
+ """
+
+ @property
+ defSUPPORTED_PARAMETERS(self)->Dict[str,Tuple[Any,Any,List[Any]]]:
+ return{
+ "model_type":(str,"ARIMA_PLUS_XREG",["ARIMA_PLUS_XREG"]),
+ "auto_arima":(bool,True,[]),
+ "clean_spikes_and_dips":(bool,True,[]),
+ "holiday_region":(str,"US",[]),
+ "data_frequency":(str,"AUTO_FREQUENCY",
+ ["AUTO_FREQUENCY","DAILY","WEEKLY","MONTHLY"]),
+ "adjust_step_changes":(bool,True,[]),
+ "non_negative_forecast":(bool,False,[]),
+ }
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/_modules/iowa_forecast/plots.html b/_modules/iowa_forecast/plots.html
index 878103b..ff5789d 100644
--- a/_modules/iowa_forecast/plots.html
+++ b/_modules/iowa_forecast/plots.html
@@ -208,6 +208,7 @@
[docs]defconvert_to_datetime(dataframe:pd.DataFrame,col:str)->pd.DataFrame:""" Convert a specified column in a DataFrame to datetime format.
@@ -325,7 +326,7 @@
[docs]defparse_combined_string(combined:str)->dict:
+"""Parse a combined offset string into its components.
+
+ Parameters
+ ----------
+ combined : str
+ A combined string specifying the offset, e.g., `'2Y3M2W1D'`.
+
+ Returns
+ -------
+ dict
+ A dictionary with keys `'years'`, `'months'`, `'weeks'`, `'days'`
+ and their corresponding values.
+
+ Raises
+ ------
+ ValueError
+ If the combined string is invalid.
+ """
+ pattern=re.compile(
+ r'(?P<years>\d+Y)?(?P<months>\d+M)?(?P<weeks>\d+W)?(?P<days>\d+D)?',
+ re.IGNORECASE
+ )
+ match=pattern.fullmatch(combined)
+ ifnotmatch:
+ raiseValueError(f"The specified `combined` string {combined} is not valid.")
+
+ return{k:int(v[:-1])ifvelse0fork,vinmatch.groupdict().items()}
+
+
+
[docs]defcreate_date_offset_from_parts(years=0,months=0,weeks=0,days=0)->pd.DateOffset:
+"""Create a `pandas.DateOffset` object from individual time components.
+
+ Parameters
+ ----------
+ years : int, default=0
+ Number of years for the offset.
+ months : int, default=0
+ Number of months for the offset.
+ weeks : int, default=0
+ Number of weeks for the offset.
+ days : int, default=0
+ Number of days for the offset.
+
+ Returns
+ -------
+ pd.DateOffset
+ A `pandas.DateOffset` object for the specified time components.
+ """
+ returnpd.DateOffset(years=years,months=months,weeks=weeks,days=days)
+
+
+
[docs]defdate_offset(*args:Union[int,str],freq:str=None)->pd.DateOffset:
+"""
+ Generate a `pandas.DateOffset` based on the given frequency and value or a combined string.
+
+ Parameters
+ ----------
+ args : int or str
+ * If one argument is provided, it should be a combined string specifying
+ the offset, e.g., `'2Y3M2W1D'`.
+ * If two arguments are provided, they should be `n` (int) and `freq` (str).
+ freq : str {'days', 'weeks', 'months', 'years'}, optional
+ The frequency type. Valid options are `'days'`, `'weeks'`, `'months'`, `'years'`.
+ Ignored if `combined` is provided.
+
+ Returns
+ -------
+ pd.DateOffset
+ A `pandas.DateOffset` object for the specified frequency and value.
+
+ Raises
+ ------
+ ValueError
+ If `freq` is not one of the valid options or if the combined string is invalid.
+ """
+ iflen(args)==1andisinstance(args[0],str):
+ combined=args[0]
+ offset_parts=parse_combined_string(combined)
+ returncreate_date_offset_from_parts(**offset_parts)
+
+ iflen(args)==2andisinstance(args[0],int)andisinstance(args[1],str):
+ n,freq=args
+ freq=freq.lower()
+ valid_freqs={"d":"days","day":"days","days":"days",
+ "w":"weeks","week":"weeks","weeks":"weeks",
+ "m":"months","month":"months","months":"months",
+ "y":"years","year":"years","years":"years"}
+
+ iffreqnotinvalid_freqs:
+ raiseValueError(f"The specified `freq` {freq} is not a valid frequency. "
+ "Valid frequencies are: 'days', 'weeks', 'months', 'years'.")
+
+ returncreate_date_offset_from_parts(**{valid_freqs[freq]:n})
+
+ raiseValueError(
+ "Either provide a single combined string or both `n` and `freq` as arguments.")
diff --git a/_sources/api_reference/index.rst.txt b/_sources/api_reference/index.rst.txt
index 6cca042..ee4296a 100644
--- a/_sources/api_reference/index.rst.txt
+++ b/_sources/api_reference/index.rst.txt
@@ -1,5 +1,5 @@
API Reference
-==============
+=============
This page gives an overview of all public pandas objects, functions and methods.
All functions exposed in `iowa_forecast.*` namespace are public.
@@ -12,14 +12,17 @@ The `iowa_forecast` package contains the following modules:
* `iowa_forecast.ml_train`: BigQuery Model Training and Execution Module.
+* `iowa_forecast.models_configs`: Classes for managing and validating
+ configuration parameters
+
* `iowa_forecast.plots`: Time Series Plotting and Date Handling Module.
* `iowa_forecast.utils`: General utility functions Module.
iowa\_forecast
--------------
+--------------
.. toctree::
:maxdepth: 2
- iowa_forecast/index
\ No newline at end of file
+ iowa_forecast/index
diff --git a/_sources/api_reference/iowa_forecast/index.rst.txt b/_sources/api_reference/iowa_forecast/index.rst.txt
index f2f95cc..ac3eae2 100644
--- a/_sources/api_reference/iowa_forecast/index.rst.txt
+++ b/_sources/api_reference/iowa_forecast/index.rst.txt
@@ -8,5 +8,6 @@ iowa\_forecast
load_data
ml_eval
ml_train
+ models_configs
plots
utils
diff --git a/_sources/api_reference/iowa_forecast/models_configs.rst.txt b/_sources/api_reference/iowa_forecast/models_configs.rst.txt
new file mode 100644
index 0000000..872049d
--- /dev/null
+++ b/_sources/api_reference/iowa_forecast/models_configs.rst.txt
@@ -0,0 +1,7 @@
+models\_configs
+---------------
+
+.. automodule:: iowa_forecast.models_configs
+ :members:
+ :undoc-members:
+ :show-inheritance:
\ No newline at end of file
diff --git a/api_reference/index.html b/api_reference/index.html
index aba4b8a..cc00c55 100644
--- a/api_reference/index.html
+++ b/api_reference/index.html
@@ -209,6 +209,7 @@
Generate a BigQuery βCREATE MODELβ query for a specified item.
This function constructs an SQL query to create an ARIMA_PLUS_XREG
model in BigQuery, tailored for the provided item and its associated
@@ -299,11 +300,21 @@
Functionsstr, default"total_amount_sold") β The column name representing the time series data.
model_name (str, default"bqmlforecast.arima_plus_xreg_model") β The base name for the model.
train_table_name (str, default"bqmlforecast.training_data") β The name of the table containing training data.
-
test_table_name (str, default"bqmlforecast.test_data") β The name of the table containing test data.
-
holiday_region (str, default"US") β The holiday region to be used by the model.
-
auto_arima (bool, defaultTrue) β Whether to enable AUTO_ARIMA.
-
adjust_step_changes (bool, defaultTrue) β Whether to adjust for step changes in the data.
-
clean_spikes_and_dips (bool, defaultTrue) β Whether to clean spikes and dips in the data.
+
test_table_name (str|None, default"bqmlforecast.test_data") β The name of the table containing test data.
Create ARIMA_PLUS_XREG models for a list of items.
-
This function generates and executes a CREATE MODEL query
+iowa_forecast.ml_train.create_models_for_items(client:bigquery.Client, items_list:List[str], max_items:int|None=None, timestamp_col:str='date', time_series_data_col:str='total_amount_sold', model_name:str='bqmlforecast.arima_plus_xreg_model', train_table_name:str='bqmlforecast.training_data', test_table_name:str|None='bqmlforecast.test_data', **kwargs)→None[source]ΒΆ
+
Create 'ARIMA_PLUS_XREG' models for a list of items.
+
This function generates and executes a 'CREATEMODEL' query
for each item in the provided list. The models are created
using the specified training and test tables in BigQuery.
@@ -371,11 +433,23 @@
Functionsstr, default"total_amount_sold") β The column name representing the time series data.
model_name (str, default"bqmlforecast.arima_plus_xreg_model") β The base name for the models.
train_table_name (str, default"bqmlforecast.training_data") β The name of the table containing training data.
-
test_table_name (str, default"bqmlforecast.test_data") β The name of the table containing test data.
-
holiday_region (str, default"US") β The holiday region to be used by the models.
-
auto_arima (bool, defaultTrue) β Whether to enable AUTO_ARIMA.
-
adjust_step_changes (bool, defaultTrue) β Whether to adjust for step changes in the data.
-
clean_spikes_and_dips (bool, defaultTrue) β Whether to clean spikes and dips in the data.
+
test_table_name (str|None, default"bqmlforecast.test_data") β The name of the table containing test data.
+If None, then only the data from train_table_name is used for
+training the model. See the βNotesβ section for more information.
Train ARIMA models for a list of columns and store their metrics.
-
This function generates and executes βCREATE MODELβ queries for ARIMA
+
This function generates and executes 'CREATEMODEL' queries for ARIMA
models using the specified columns, and evaluates their performance
by creating tables of model metrics.
These ARIMA models will then be used to generate the future feature values
@@ -412,12 +492,13 @@
This module provides classes for managing and validating configuration parameters
+for various machine learning models, specifically focusing on ARIMA-based models.
+The module includes a base class that standardizes the process of handling model
+configuration, ensuring that all derived classes follow a consistent pattern for
+validating and setting parameters.
An abstract base class for the BaseModelConfig class.
+
+
BaseModelConfigAbstractBaseModelConfig
A base class that provides common functionality for model
+configuration, including parameter validation, default value handling, and
+error checking. Subclasses are required to define a SUPPORTED_PARAMETERS
+dictionary that specifies the expected parameter types, default values,
+and any valid choices.
+
+
ARIMAConfigBaseModelConfig
A configuration class for ARIMA model parameters. Inherits from BaseModelConfig
+and defines specific parameters used by ARIMA and ARIMA_PLUS models. This class
+ensures that the parameters adhere to the expected types and valid choices.
+
+
ARIMA_PLUS_XREG_ConfigBaseModelConfig
A configuration class for ARIMA_PLUS_XREG model parameters. This class extends
+BaseModelConfig and includes additional parameters for handling exogenous
+variables (xreg_features) and other settings specific to the ARIMA_PLUS_XREG model.
These configuration classes are intended to be used in the setup and validation of
+model parameters before they are passed to machine learning model training functions.
+By leveraging these classes, developers can ensure that all configuration parameters
+are correctly typed, fall within valid ranges, and adhere to expected choices, reducing
+the likelihood of runtime errors.
This abstract property must be implemented by subclasses.
+It should return a dictionary where the keys are parameter names,
+and the values are tuples containing the expected type, default value,
+and a list of valid choices (if any).
This class provides common functionality for handling configuration parameters
+passed via kwargs, including unpacking, validation, and setting default values.
+
Subclasses must define the SUPPORTED_PARAMETERS dictionary, which specifies
+the expected parameter types, default values, and any restricted choices.
This abstract property must be implemented by subclasses.
+It should return a dictionary where the keys are parameter names,
+and the values are tuples containing the expected type, default value,
+and a list of valid choices (if any).
Inherits common functionality from BaseModelConfig and defines specific
+parameters for 'ARIMA' models, including validation of choices for some
+parameters.
This abstract property must be implemented by subclasses.
+It should return a dictionary where the keys are parameter names,
+and the values are tuples containing the expected type, default value,
+and a list of valid choices (if any).
Configuration class for 'ARIMA_PLUS_XREG' model parameters.
+
Inherits common functionality from BaseModelConfig and defines specific
+parameters for 'ARIMA_PLUS_XREG' models, including validation of choices for
+some parameters.
This abstract property must be implemented by subclasses.
+It should return a dictionary where the keys are parameter names,
+and the values are tuples containing the expected type, default value,
+and a list of valid choices (if any).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/api_reference/iowa_forecast/plots.html b/api_reference/iowa_forecast/plots.html
index 7d0a2db..fda509d 100644
--- a/api_reference/iowa_forecast/plots.html
+++ b/api_reference/iowa_forecast/plots.html
@@ -3,7 +3,7 @@
-
+
plots - Iowa Liquor Sales Forecast 0.0.1 documentation
@@ -209,6 +209,7 @@
If one argument is provided, it should be a combined string specifying
+the offset, e.g., '2Y3M2W1D'.
+
If two arguments are provided, they should be n (int) and freq (str).
+
+
+
freq (str {'days','weeks','months','years'}, optional) β The frequency type. Valid options are 'days', 'weeks', 'months', 'years'.
+Ignored if combined is provided.
+
+
+
Returns:
+
pd.DateOffset β A pandas.DateOffset object for the specified frequency and value.
+
+
Raises:
+
ValueError β If freq is not one of the valid options or if the combined string is invalid.
max_items (int or None, defaultNone) β Maximum number of items to process. If None, all items are processed.
See the βNotesβ section for more information.
This module provides functions for handling date-related operations
-on DataFrames and for visualizing time series data, including historical,
-forecast, and actual values. It supports both Matplotlib and Plotly
-as plotting engines, offering flexibility in visualization options.
convert_to_datetime: convert a column in a DataFrame to datetime format.
-
filter_by_date: filter a DataFrame by a start date.
-
plot_historical_and_forecast: plot historical data with optional forecast and actual values.
-
-
-
Notes
-
This module is designed to assist in the preparation and visualization
-of time series data. The plot_historical_and_forecast function is
-particularly useful for comparing historical data with forecasted
-and actual values, with options to highlight peaks and add custom
-plot elements using either Matplotlib or Plotly.