diff --git a/.doctrees/api_reference/index.doctree b/.doctrees/api_reference/index.doctree index 8386f08..38cf424 100644 Binary files a/.doctrees/api_reference/index.doctree and b/.doctrees/api_reference/index.doctree differ diff --git a/.doctrees/api_reference/iowa_forecast/index.doctree b/.doctrees/api_reference/iowa_forecast/index.doctree index 19020ee..6f66ef6 100644 Binary files a/.doctrees/api_reference/iowa_forecast/index.doctree and b/.doctrees/api_reference/iowa_forecast/index.doctree differ diff --git a/.doctrees/api_reference/iowa_forecast/load_data.doctree b/.doctrees/api_reference/iowa_forecast/load_data.doctree index 3f96d72..84d6734 100644 Binary files a/.doctrees/api_reference/iowa_forecast/load_data.doctree and b/.doctrees/api_reference/iowa_forecast/load_data.doctree differ diff --git a/.doctrees/api_reference/iowa_forecast/ml_train.doctree b/.doctrees/api_reference/iowa_forecast/ml_train.doctree index f867da9..e6c5056 100644 Binary files a/.doctrees/api_reference/iowa_forecast/ml_train.doctree and b/.doctrees/api_reference/iowa_forecast/ml_train.doctree differ diff --git a/.doctrees/api_reference/iowa_forecast/models_configs.doctree b/.doctrees/api_reference/iowa_forecast/models_configs.doctree new file mode 100644 index 0000000..33b382e Binary files /dev/null and b/.doctrees/api_reference/iowa_forecast/models_configs.doctree differ diff --git a/.doctrees/api_reference/iowa_forecast/utils.doctree b/.doctrees/api_reference/iowa_forecast/utils.doctree index 807728e..2479c99 100644 Binary files a/.doctrees/api_reference/iowa_forecast/utils.doctree and b/.doctrees/api_reference/iowa_forecast/utils.doctree differ diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle index 9ad30ad..643c125 100644 Binary files a/.doctrees/environment.pickle and b/.doctrees/environment.pickle differ diff --git a/.doctrees/iowa_forecast.doctree b/.doctrees/iowa_forecast.doctree index e9bc1f7..cd324f8 100644 Binary files a/.doctrees/iowa_forecast.doctree and b/.doctrees/iowa_forecast.doctree differ diff --git a/.doctrees/modules.doctree b/.doctrees/modules.doctree index 9a86943..3a96538 100644 Binary files a/.doctrees/modules.doctree and b/.doctrees/modules.doctree differ diff --git a/_modules/index.html b/_modules/index.html index 287f4a8..7c89201 100644 --- a/_modules/index.html +++ b/_modules/index.html @@ -208,6 +208,7 @@
  • load_data
  • ml_eval
  • ml_train
  • +
  • models_configs
  • plots
  • utils
  • @@ -251,6 +252,7 @@

    All modules for which code is available

    diff --git a/_modules/iowa_forecast/load_data.html b/_modules/iowa_forecast/load_data.html index abf42c5..ee4421b 100644 --- a/_modules/iowa_forecast/load_data.html +++ b/_modules/iowa_forecast/load_data.html @@ -208,6 +208,7 @@
  • load_data
  • ml_eval
  • ml_train
  • +
  • models_configs
  • plots
  • utils
  • @@ -306,54 +307,22 @@

    Source code for iowa_forecast.load_data

     from google.cloud import bigquery
     from rich.progress import track
     
    -from iowa_forecast.utils import list_tables_with_pattern
    +from iowa_forecast.utils import list_tables_with_pattern, date_offset
     
     
    -
    [docs]def date_offset(n: int, freq: str) -> pd.DateOffset: - """Generate a pandas DateOffset based on the given frequency and value. +
    [docs]def get_item_names_filter(items_list: List[str] | str) -> str: + """ + Generate a `"WHERE"` clause component to filter values from column `"item_name"`. Parameters ---------- - n : int - The number of time units for the offset. - freq : str {'days', 'weeks', 'months', 'years'} - The frequency type. Valid options are 'days', 'weeks', 'months', 'years'. - - Returns - ------- - pd.DateOffset - A DateOffset object for the specified frequency and value. - - Raises - ------ - ValueError - If `freq` is not one of the valid options. - """ - if freq == "days": - return pd.DateOffset(days=n) - if freq == "weeks": - return pd.DateOffset(weeks=n) - if freq == "months": - return pd.DateOffset(months=n) - if freq == "years": - return pd.DateOffset(years=n) - raise ValueError( - f"The specified `freq` {freq} is not a valid frequency. " - "Valid frequencies are: 'days', 'weeks', 'months', 'years'." - )
    - - -
    [docs]def get_item_names_filter(items_list: List[str] | str) -> str: - """ - Generate a "WHERE" clause component to filter values from column `"item_name"`. - items_list : List[str] | str - Item name or names to add to the "WHERE" clause component. + Item name or names to add to the `"WHERE"` clause component. Returns ------- str - The "WHERE" clause component that can be used to filter values from column `"item_name"`. + The `"WHERE"` clause component that can be used to filter values from column `"item_name"`. Examples -------- @@ -369,17 +338,19 @@

    Source code for iowa_forecast.load_data

         return "(" + " OR ".join(f'item_name = "{item_name}"' for item_name in items_list) + ")"
    -
    [docs]def get_min_datapoints_filter(min_size: int) -> str: +
    [docs]def get_min_datapoints_filter(min_size: int) -> str: """ - Generate a "WHERE" clause to filter items that have at least `min_size` observations. + Generate a `"WHERE"` clause to filter items that have at least `min_size` observations. + Parameters + ---------- min_size : int - Minimum number of observations to use as value for the "WHERE" clause. + Minimum number of observations to use as value for the `"WHERE"` clause. Returns ------- str - The "WHERE" clause component. + The `"WHERE"` clause component. """ return f""" WHERE @@ -388,7 +359,7 @@

    Source code for iowa_forecast.load_data

         """
    -
    [docs]def get_training_data( +
    [docs]def get_training_data( client: bigquery.Client, table_name: str = 'bqmlforecast.training_data', start_date: str | None = None, @@ -619,7 +590,7 @@

    Source code for iowa_forecast.load_data

         return client.query(f"SELECT * FROM `{table_name}`").to_dataframe()
    -
    [docs]def get_year_weather_query(year: int, state: str = "IA") -> str: +
    [docs]def get_year_weather_query(year: int, state: str = "IA") -> str: """ Generate an SQL query to retrieve weather data for a specific year and state. @@ -650,7 +621,7 @@

    Source code for iowa_forecast.load_data

         """
    -
    [docs]def get_weather_query(start_date: str, end_date: str, state: str = "IA") -> str: +
    [docs]def get_weather_query(start_date: str, end_date: str, state: str = "IA") -> str: """ Generate an SQL query to retrieve weather data for a given date range. @@ -673,7 +644,7 @@

    Source code for iowa_forecast.load_data

         return " UNION ALL ".join(weather_year_queries)
    -
    [docs]def create_forecast_features_query( +
    [docs]def create_forecast_features_query( client: bigquery.Client, dataset_id: str = "bqmlforecast", forecast_tables_pattern: str = "forecast_*", @@ -773,7 +744,7 @@

    Source code for iowa_forecast.load_data

         return query
    -
    [docs]def create_future_data( +
    [docs]def create_future_data( client: bigquery.Client, train_table_name: str = "bqmlforecast.training_data", test_table_name: str = "bqmlforecast.test_data", @@ -950,7 +921,7 @@

    Source code for iowa_forecast.load_data

         future_data_job.result()
    -
    [docs]def create_future_feature_table( +
    [docs]def create_future_feature_table( client: bigquery.Client, table_name: str, model_name: str, @@ -986,7 +957,7 @@

    Source code for iowa_forecast.load_data

         create_table_job.result()
    -
    [docs]def create_future_feature_tables( +
    [docs]def create_future_feature_tables( client: bigquery.Client, columns: List[str], model: str = "bqmlforecast.arima_model", diff --git a/_modules/iowa_forecast/ml_eval.html b/_modules/iowa_forecast/ml_eval.html index c08c42e..7dee987 100644 --- a/_modules/iowa_forecast/ml_eval.html +++ b/_modules/iowa_forecast/ml_eval.html @@ -208,6 +208,7 @@
  • load_data
  • ml_eval
  • ml_train
  • +
  • models_configs
  • plots
  • utils
  • @@ -306,7 +307,7 @@

    Source code for iowa_forecast.ml_eval

     from iowa_forecast.utils import normalize_item_name
     
     
    -
    [docs]def evaluate_models( +
    [docs]def evaluate_models( client: bigquery.Client, items_list: List[str], end_date: str | None = None, @@ -377,7 +378,7 @@

    Source code for iowa_forecast.ml_eval

         return eval_df
    -
    [docs]def get_data(client: bigquery.Client, query: str) -> pd.DataFrame: +
    [docs]def get_data(client: bigquery.Client, query: str) -> pd.DataFrame: """ Execute a BigQuery SQL query and return the result as a DataFrame. @@ -397,7 +398,7 @@

    Source code for iowa_forecast.ml_eval

         return query_job.to_dataframe()
    -
    [docs]def create_query( +
    [docs]def create_query( table: str, item_name: str, date_filter: str | None = None, @@ -431,7 +432,7 @@

    Source code for iowa_forecast.ml_eval

         return query
    -
    [docs]def get_train_data( +
    [docs]def get_train_data( client: bigquery.Client, item_name: str, table_name: str = "bqmlforecast.training_data", @@ -468,7 +469,7 @@

    Source code for iowa_forecast.ml_eval

         return get_data(client, query)
    -
    [docs]def get_actual_data( +
    [docs]def get_actual_data( client: bigquery.Client, item_name: str, end_date: str, @@ -503,7 +504,7 @@

    Source code for iowa_forecast.ml_eval

         return get_data(client, query).astype({"date": str})
    -
    [docs]def get_predictions( +
    [docs]def get_predictions( client: bigquery.Client, item_name: str, end_date: str, @@ -556,7 +557,7 @@

    Source code for iowa_forecast.ml_eval

         return predictions_df
    -
    [docs]def evaluate_predictions( +
    [docs]def evaluate_predictions( client: bigquery.Client, item_name: str, end_date: str | None = None, @@ -658,7 +659,7 @@

    Source code for iowa_forecast.ml_eval

         return train_df, forecast_df
    -
    [docs]def multi_evaluate_predictions( +
    [docs]def multi_evaluate_predictions( client: bigquery.Client, items_list: List[str], end_date: str | None = None, @@ -730,7 +731,7 @@

    Source code for iowa_forecast.ml_eval

         return results_dict
    -
    [docs]def explain_model( +
    [docs]def explain_model( client: bigquery.Client, item_name: str, table_name: str = "bqmlforecast.training_data", diff --git a/_modules/iowa_forecast/ml_train.html b/_modules/iowa_forecast/ml_train.html index 4b8d519..62a0d21 100644 --- a/_modules/iowa_forecast/ml_train.html +++ b/_modules/iowa_forecast/ml_train.html @@ -208,6 +208,7 @@
  • load_data
  • ml_eval
  • ml_train
  • +
  • models_configs
  • plots
  • utils
  • @@ -289,20 +290,18 @@

    Source code for iowa_forecast.ml_train

     from google.cloud import bigquery  # pylint: disable=no-name-in-module
     from rich.progress import track
     
    +from iowa_forecast.models_configs import ARIMA_PLUS_XREG_Config, ARIMAConfig
     from iowa_forecast.utils import normalize_item_name
     
     
    -
    [docs]def create_model_query( # pylint: disable=too-many-arguments +
    [docs]def create_model_query( # pylint: disable=too-many-arguments item_name: str, timestamp_col: str = "date", time_series_data_col: str = "total_amount_sold", model_name: str = "bqmlforecast.arima_plus_xreg_model", train_table_name: str = "bqmlforecast.training_data", test_table_name: str = "bqmlforecast.test_data", - holiday_region: str = "US", - auto_arima: bool = True, - adjust_step_changes: bool = True, - clean_spikes_and_dips: bool = True, + **kwargs, ) -> str: """ Generate a BigQuery 'CREATE MODEL' query for a specified item. @@ -323,33 +322,39 @@

    Source code for iowa_forecast.ml_train

             The base name for the model.
         train_table_name : str, default="bqmlforecast.training_data"
             The name of the table containing training data.
    -    test_table_name : str, default="bqmlforecast.test_data"
    +    test_table_name : str | None, default="bqmlforecast.test_data"
             The name of the table containing test data.
    -    holiday_region : str, default="US"
    -        The holiday region to be used by the model.
    -    auto_arima : bool, default=True
    -        Whether to enable AUTO_ARIMA.
    -    adjust_step_changes : bool, default=True
    -        Whether to adjust for step changes in the data.
    -    clean_spikes_and_dips : bool, default=True
    -        Whether to clean spikes and dips in the data.
    +    **kwargs : Any
    +        Additional keyword arguments such as:
    +
    +            holiday_region : str, default="US"
    +                The holiday region to be used by the model.
    +            auto_arima : bool, default=True
    +                Whether to enable AUTO_ARIMA.
    +            adjust_step_changes : bool, default=True
    +                Whether to adjust for step changes in the data.
    +            clean_spikes_and_dips : bool, default=True
    +                Whether to clean spikes and dips in the data.
     
         Returns
         -------
         str
             A SQL query string for creating the specified model.
         """
    +    configs = ARIMA_PLUS_XREG_Config(**kwargs)
         item_name_norm = normalize_item_name(item_name)
    +    test_table_query = include_test_on_model_train(item_name, timestamp_col,
    +                                                   train_table_name, test_table_name)
         return f"""
         CREATE OR REPLACE MODEL `{model_name}_{item_name_norm}`
         OPTIONS(
           MODEL_TYPE='ARIMA_PLUS_XREG',
           TIME_SERIES_TIMESTAMP_COL='{timestamp_col}',
           TIME_SERIES_DATA_COL='{time_series_data_col}',
    -      HOLIDAY_REGION='{holiday_region}',
    -      AUTO_ARIMA={auto_arima},
    -      ADJUST_STEP_CHANGES={adjust_step_changes},
    -      CLEAN_SPIKES_AND_DIPS={clean_spikes_and_dips}
    +      HOLIDAY_REGION='{configs.holiday_region}',
    +      AUTO_ARIMA={configs.auto_arima},
    +      ADJUST_STEP_CHANGES={configs.adjust_step_changes},
    +      CLEAN_SPIKES_AND_DIPS={configs.clean_spikes_and_dips}
         ) AS
         SELECT
             *
    @@ -357,6 +362,44 @@ 

    Source code for iowa_forecast.ml_train

             `{train_table_name}`
         WHERE
             item_name = "{item_name}"
    +    {test_table_query}
    +    ORDER BY
    +        date
    +    """
    + + +
    [docs]def include_test_on_model_train( + item_name: str, + timestamp_col: str, + train_table_name: str, + test_table_name: str | None = None, +) -> str: + """ + Include test data in the model training process. + + This function generates an SQL query component to union test data with + training data if a test table is specified. + + Parameters + ---------- + item_name : str + The name of the item being modeled. + timestamp_col : str + The column name representing the timestamp in the dataset. + train_table_name : str + The name of the table containing training data. + test_table_name : str or None, optional + The name of the table containing test data. If None, no test data + is included. + + Returns + ------- + str + An SQL query string component to include test data. + """ + if not isinstance(test_table_name, str): + return "" + return f""" UNION ALL ( SELECT @@ -383,12 +426,75 @@

    Source code for iowa_forecast.ml_train

                         AND t2.item_name = "{item_name}"
                 )
             )
    -    ORDER BY
    -        date
         """
    -
    [docs]def execute_query_with_retries( +
    [docs]def include_test_on_arima_model_train( + column: str, + time_series_timestamp_col: str, + time_series_id_col: str, + train_table_name: str, + test_table_name: str | None = None, +) -> str: + """ + Include test data in the uni-variate ARIMA model training process. + + This function generates an SQL query component to union test data with + training data if a test table is specified. + + Parameters + ---------- + column : str + The name of the feature being modeled. + time_series_timestamp_col : str + The column name representing the timestamp in the dataset. + time_series_id_col : str + The column name representing the identifier. + train_table_name : str + The name of the table containing training data. + test_table_name : str or None, optional + The name of the table containing test data. If None, no test data + is included. + + Returns + ------- + str + An SQL query string component to include test data. + """ + if not isinstance(test_table_name, str): + return "" + return f""" + UNION ALL + ( + SELECT + * + FROM ( + SELECT + t2.{time_series_timestamp_col}, + t2.{column}, + t2.{time_series_id_col} + FROM + `{test_table_name}` AS t2 + JOIN + ( + SELECT + {time_series_id_col}, + MAX({time_series_timestamp_col}) AS max_date + FROM + `{train_table_name}` + GROUP BY + {time_series_id_col} + ) AS md + ON + t2.{time_series_id_col} = md.{time_series_id_col} + WHERE + t2.{time_series_timestamp_col} > md.max_date + ) + ) + """
    + + +
    [docs]def execute_query_with_retries( client: bigquery.Client, query: str, max_retries: int = 3, @@ -442,7 +548,7 @@

    Source code for iowa_forecast.ml_train

                 time.sleep(sleep_time)
    -
    [docs]def create_models_for_items( # pylint: disable=too-many-arguments +
    [docs]def create_models_for_items( # pylint: disable=too-many-arguments client: bigquery.Client, items_list: List[str], max_items: int | None = None, @@ -450,16 +556,13 @@

    Source code for iowa_forecast.ml_train

         time_series_data_col: str = "total_amount_sold",
         model_name: str = "bqmlforecast.arima_plus_xreg_model",
         train_table_name: str = "bqmlforecast.training_data",
    -    test_table_name: str = "bqmlforecast.test_data",
    -    holiday_region: str = "US",
    -    auto_arima: bool = True,
    -    adjust_step_changes: bool = True,
    -    clean_spikes_and_dips: bool = True,
    +    test_table_name: str | None = "bqmlforecast.test_data",
    +    **kwargs,
     ) -> None:
         """
    -    Create ARIMA_PLUS_XREG models for a list of items.
    +    Create `'ARIMA_PLUS_XREG'` models for a list of items.
     
    -    This function generates and executes a CREATE MODEL query
    +    This function generates and executes a `'CREATE MODEL'` query
         for each item in the provided list. The models are created
         using the specified training and test tables in BigQuery.
     
    @@ -480,16 +583,21 @@ 

    Source code for iowa_forecast.ml_train

             The base name for the models.
         train_table_name : str, default="bqmlforecast.training_data"
             The name of the table containing training data.
    -    test_table_name : str, default="bqmlforecast.test_data"
    +    test_table_name : str | None, default="bqmlforecast.test_data"
             The name of the table containing test data.
    -    holiday_region : str, default="US"
    -        The holiday region to be used by the models.
    -    auto_arima : bool, default=True
    -        Whether to enable AUTO_ARIMA.
    -    adjust_step_changes : bool, default=True
    -        Whether to adjust for step changes in the data.
    -    clean_spikes_and_dips : bool, default=True
    -        Whether to clean spikes and dips in the data.
    +        If `None`, then only the data from `train_table_name` is used for
    +        training the model. See the 'Notes' section for more information.
    +    **kwargs : Any
    +        Additional keyword arguments such as:
    +
    +            holiday_region : str, default="US"
    +                The holiday region to be used by the models.
    +            auto_arima : bool, default=True
    +                Whether to enable `'AUTO_ARIMA'`.
    +            adjust_step_changes : bool, default=True
    +                Whether to adjust for step changes in the data.
    +            clean_spikes_and_dips : bool, default=True
    +                Whether to clean spikes and dips in the data.
     
         Notes
         -----
    @@ -502,6 +610,13 @@ 

    Source code for iowa_forecast.ml_train

     
             If using a Google Cloud account with billing enabled, running this
             code might incur charges.
    +
    +    If you are evaluating the model, you shouldn't use all available data
    +    to train the model. Therefore, if you're evaluating the model, consider
    +    setting the parameter `test_table_name` to `None`. Doing so will cause
    +    the model to be trained using only the specified data from the
    +    `train_table_name` which in turn will allow you to use the data from
    +    `test_table_name` for evaluation.
         """
         _items_list = (
             items_list if not isinstance(max_items, int) else items_list[:max_items]
    @@ -514,30 +629,29 @@ 

    Source code for iowa_forecast.ml_train

                 model_name,
                 train_table_name,
                 test_table_name,
    -            holiday_region,
    -            auto_arima,
    -            adjust_step_changes,
    -            clean_spikes_and_dips,
    +            **kwargs,
             )
             execute_query_with_retries(client, query)
    -
    [docs]def train_arima_models( # pylint: disable=too-many-locals, too-many-arguments +
    [docs]def train_arima_models( # pylint: disable=too-many-locals, too-many-arguments client: bigquery.Client, columns: List[str], model: str = "bqmlforecast.arima_model", train_table_name: str = "bqmlforecast.training_data", - test_table_name: str = "bqmlforecast.test_data", + test_table_name: str | None = "bqmlforecast.test_data", model_metrics_table_name: str | None = "bqmlforecast.arima_model_metrics", time_series_timestamp_col: str = "date", time_series_id_col: str = "item_name", - confidence_level=0.9, - horizon=7, + confidence_level: float = 0.9, + horizon: int = 7, + use_test_data_on_train: bool = True, + **kwargs, ): """ Train ARIMA models for a list of columns and store their metrics. - This function generates and executes 'CREATE MODEL' queries for ARIMA + This function generates and executes `'CREATE MODEL'` queries for ARIMA models using the specified columns, and evaluates their performance by creating tables of model metrics. @@ -554,7 +668,7 @@

    Source code for iowa_forecast.ml_train

             The base name for the ARIMA models.
         train_table_name : str, default="bqmlforecast.training_data"
             The name of the table containing training data.
    -    test_table_name : str, default="bqmlforecast.test_data"
    +    test_table_name : str | None, default="bqmlforecast.test_data"
             The name of the table containing test data.
         model_metrics_table_name : str or None, default="bqmlforecast.arima_model_metrics"
             The base name for the tables where model metrics will be stored.
    @@ -566,25 +680,37 @@ 

    Source code for iowa_forecast.ml_train

             The confidence level used in the model evaluation.
         horizon : int, default=7
             The number of time steps (days) to forecast.
    -
    +    use_test_data_on_train : bool, default=True
    +        Whether to use test data during model training.
         """
    +    config = ARIMAConfig(**kwargs)
    +
         for column in track(columns, description="Creating ARIMA models..."):
             model_name = f"{model}_{column}"
    +        test_data_query = ""
    +        if use_test_data_on_train:
    +            test_data_query = include_test_on_arima_model_train(
    +                column,
    +                time_series_timestamp_col,
    +                time_series_id_col,
    +                train_table_name,
    +                test_table_name,
    +            )
             train_arima_query = f"""
             CREATE OR REPLACE MODEL `{model_name}`
             OPTIONS(
    -            MODEL_TYPE = 'ARIMA_PLUS',
    -            AUTO_ARIMA = TRUE,
    +            MODEL_TYPE = '{config.model_type}',
    +            AUTO_ARIMA = {config.auto_arima},
                 HORIZON = {horizon},
                 TIME_SERIES_TIMESTAMP_COL = '{time_series_timestamp_col}',
                 TIME_SERIES_DATA_COL = '{column}',
                 TIME_SERIES_ID_COL = '{time_series_id_col}',
    -            FORECAST_LIMIT_LOWER_BOUND = 0,
    -            DECOMPOSE_TIME_SERIES = TRUE,
    -            HOLIDAY_REGION = 'US',
    -            DATA_FREQUENCY = 'AUTO_FREQUENCY',
    -            ADJUST_STEP_CHANGES = TRUE,
    -            CLEAN_SPIKES_AND_DIPS = TRUE
    +            FORECAST_LIMIT_LOWER_BOUND = {config.forecast_limit_lower_bound},
    +            DECOMPOSE_TIME_SERIES = {config.decompose_time_series},
    +            HOLIDAY_REGION = '{config.holiday_region}',
    +            DATA_FREQUENCY = '{config.data_frequency}',
    +            ADJUST_STEP_CHANGES = {config.adjust_step_changes},
    +            CLEAN_SPIKES_AND_DIPS = {config.clean_spikes_and_dips}
             ) AS
             SELECT
                 {time_series_timestamp_col},
    @@ -592,33 +718,7 @@ 

    Source code for iowa_forecast.ml_train

                 {time_series_id_col}
             FROM
                 `{train_table_name}`
    -        UNION ALL
    -            (
    -                SELECT
    -                    *
    -                FROM (
    -                    SELECT
    -                        t2.{time_series_timestamp_col},
    -                        t2.{column},
    -                        t2.{time_series_id_col}
    -                    FROM
    -                        `{test_table_name}` AS  t2
    -                    JOIN
    -                        (
    -                            SELECT
    -                                {time_series_id_col},
    -                                MAX({time_series_timestamp_col}) AS max_date
    -                            FROM
    -                                `{train_table_name}`
    -                            GROUP BY
    -                                {time_series_id_col}    
    -                        ) AS md
    -                    ON
    -                        t2.{time_series_id_col} = md.{time_series_id_col}
    -                    WHERE
    -                        t2.{time_series_timestamp_col} > md.max_date
    -                )
    -            )
    +        {test_data_query}
             """
             train_arima_job = client.query(train_arima_query)
             train_arima_job.result()
    diff --git a/_modules/iowa_forecast/models_configs.html b/_modules/iowa_forecast/models_configs.html
    new file mode 100644
    index 0000000..8681a4f
    --- /dev/null
    +++ b/_modules/iowa_forecast/models_configs.html
    @@ -0,0 +1,457 @@
    +
    +
    +  
    +    
    +    
    +
    +    
    +        iowa_forecast.models_configs - Iowa Liquor Sales Forecast 0.0.1 documentation
    +      
    +    
    +    
    +    
    +    
    +    
    +    
    +
    +
    +
    +  
    +    
    +    
    +    
    +
    +
    +  
    +    Contents
    +    
    +      
    +    
    +  
    +  
    +    Menu
    +    
    +      
    +      
    +      
    +    
    +  
    +  
    +    Expand
    +    
    +      
    +    
    +  
    +  
    +    Light mode
    +    
    +      
    +      
    +      
    +      
    +      
    +      
    +      
    +      
    +      
    +    
    +  
    +  
    +    Dark mode
    +    
    +      
    +      
    +    
    +  
    +  
    +    Auto light/dark, in light mode
    +    
    +      
    +      
    +      
    +      
    +      
    +      
    +      
    +      
    +      
    +      
    +    
    +  
    +  
    +    Auto light/dark, in dark mode
    +    
    +      
    +      
    +      
    +      
    +      
    +      
    +      
    +      
    +      
    +      
    +    
    +  
    +  
    +    
    +      
    +      
    +      
    +      
    +    
    +  
    +  
    +    
    +      
    +      
    +      
    +      
    +      
    +    
    +  
    +
    +
    +
    +
    +
    +
    +
    +Skip to content
    +
    +
    +
    +
    +
    +
    + +
    + +
    +
    + +
    + +
    +
    + +
    +
    +
    + + + + + Back to top + +
    +
    + +
    + +
    +
    +

    Source code for iowa_forecast.models_configs

    +"""
    +This module provides classes for managing and validating configuration parameters
    +for various machine learning models, specifically focusing on ARIMA-based models.
    +The module includes a base class that standardizes the process of handling model
    +configuration, ensuring that all derived classes follow a consistent pattern for
    +validating and setting parameters.
    +
    +Classes
    +-------
    +AbstractBaseModelConfig : ABC
    +    An abstract base class for the `BaseModelConfig` class.
    +
    +BaseModelConfig : AbstractBaseModelConfig
    +    A base class that provides common functionality for model
    +    configuration, including parameter validation, default value handling, and
    +    error checking. Subclasses are required to define a `SUPPORTED_PARAMETERS`
    +    dictionary that specifies the expected parameter types, default values,
    +    and any valid choices.
    +
    +ARIMAConfig : BaseModelConfig
    +    A configuration class for ARIMA model parameters. Inherits from `BaseModelConfig`
    +    and defines specific parameters used by ARIMA and ARIMA_PLUS models. This class
    +    ensures that the parameters adhere to the expected types and valid choices.
    +
    +ARIMA_PLUS_XREG_Config : BaseModelConfig
    +    A configuration class for ARIMA_PLUS_XREG model parameters. This class extends
    +    `BaseModelConfig` and includes additional parameters for handling exogenous
    +    variables (`xreg_features`) and other settings specific to the `ARIMA_PLUS_XREG` model.
    +
    +Usage
    +-----
    +These configuration classes are intended to be used in the setup and validation of
    +model parameters before they are passed to machine learning model training functions.
    +By leveraging these classes, developers can ensure that all configuration parameters
    +are correctly typed, fall within valid ranges, and adhere to expected choices, reducing
    +the likelihood of runtime errors.
    +
    +Example
    +-------
    +>>> config = ARIMAConfig(model_type="ARIMA")
    +>>> print(config.model_type)
    +'ARIMA'
    +
    +>>> xreg_config = ARIMA_PLUS_XREG_Config(
    +...     model_type="ARIMA_PLUS_XREG",
    +...     xreg_features=["feature1", "feature2"],
    +...     non_negative_forecast=True
    +... )
    +>>> print(xreg_config.xreg_features)
    +['feature1', 'feature2']
    +"""
    +from abc import ABC, abstractmethod
    +from typing import Any, Dict, Tuple, List
    +
    +
    +
    [docs]class AbstractBaseModelConfig(ABC): # pylint: disable=too-few-public-methods + """Abstract base class for `BaseModelConfig` configuration class.""" + + @property + @abstractmethod + def SUPPORTED_PARAMETERS(self) -> Dict[ # pylint: disable=invalid-name + str, Tuple[Any, Any, List[Any]] + ]: + """ + This abstract property must be implemented by subclasses. + It should return a dictionary where the keys are parameter names, + and the values are tuples containing the expected type, default value, + and a list of valid choices (if any). + """
    + + +
    [docs]class BaseModelConfig(AbstractBaseModelConfig): + """ + Base class for model configuration parameters. + + This class provides common functionality for handling configuration parameters + passed via kwargs, including unpacking, validation, and setting default values. + + Subclasses must define the `SUPPORTED_PARAMETERS` dictionary, which specifies + the expected parameter types, default values, and any restricted choices. + """ + + @property + def SUPPORTED_PARAMETERS(self) -> Dict[str, Tuple[Any, Any, List[Any]]]: + return {} + + def __init__(self, **kwargs): + self._params = {} + self._validate_and_set_parameters(kwargs) + + def _validate_and_set_parameters(self, kwargs: Dict[str, Any]): + for key, (expected_type, default_value, choices) in self.SUPPORTED_PARAMETERS.items(): + if key in kwargs: + value = kwargs[key] + if not isinstance(value, expected_type): + raise ValueError( + f"Invalid value for parameter '{key}': expected {expected_type.__name__}, " + f"but got {type(value).__name__}." + ) + if choices and value not in choices: + raise ValueError( + f"Invalid value for parameter '{key}': got '{value}', " + f"but expected one of {choices}." + ) + self._params[key] = value + else: + self._params[key] = default_value + + # Identify unsupported parameters + unsupported_params = set(kwargs) - set(self.SUPPORTED_PARAMETERS) + if unsupported_params: + raise ValueError( + f"Unsupported parameters provided: {', '.join(unsupported_params)}. " + "Please check your input." + ) + + def __getattr__(self, name: str) -> Any: + if name in self._params: + return self._params[name] + raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")
    + + +
    [docs]class ARIMAConfig(BaseModelConfig): # pylint: disable=too-few-public-methods + """ + Configuration class for `'ARIMA'` model parameters. + + Inherits common functionality from `BaseModelConfig` and defines specific + parameters for `'ARIMA'` models, including validation of choices for some + parameters. + """ + + @property + def SUPPORTED_PARAMETERS(self) -> Dict[str, Tuple[Any, Any, List[Any]]]: + return { + "model_type": (str, "ARIMA_PLUS", ["ARIMA_PLUS", "ARIMA"]), + "auto_arima": (bool, True, []), + "forecast_limit_lower_bound": (int, 0, []), + "clean_spikes_and_dips": (bool, True, []), + "decompose_time_series": (bool, True, []), + "holiday_region": (str, "US", []), + "data_frequency": (str, "AUTO_FREQUENCY", + ["AUTO_FREQUENCY", "DAILY", "WEEKLY", "MONTHLY"]), + "adjust_step_changes": (bool, True, []), + }
    + + +
    [docs]class ARIMA_PLUS_XREG_Config(BaseModelConfig): # pylint: disable=invalid-name, too-few-public-methods + """ + Configuration class for `'ARIMA_PLUS_XREG'` model parameters. + + Inherits common functionality from `BaseModelConfig` and defines specific + parameters for `'ARIMA_PLUS_XREG'` models, including validation of choices for + some parameters. + """ + + @property + def SUPPORTED_PARAMETERS(self) -> Dict[str, Tuple[Any, Any, List[Any]]]: + return { + "model_type": (str, "ARIMA_PLUS_XREG", ["ARIMA_PLUS_XREG"]), + "auto_arima": (bool, True, []), + "clean_spikes_and_dips": (bool, True, []), + "holiday_region": (str, "US", []), + "data_frequency": (str, "AUTO_FREQUENCY", + ["AUTO_FREQUENCY", "DAILY", "WEEKLY", "MONTHLY"]), + "adjust_step_changes": (bool, True, []), + "non_negative_forecast": (bool, False, []), + }
    +
    +
    +
    +
    + + +
    +
    + + Made with Sphinx and @pradyunsg's + + Furo + +
    +
    + +
    +
    + +
    +
    + +
    +
    + + + + + + \ No newline at end of file diff --git a/_modules/iowa_forecast/plots.html b/_modules/iowa_forecast/plots.html index 878103b..ff5789d 100644 --- a/_modules/iowa_forecast/plots.html +++ b/_modules/iowa_forecast/plots.html @@ -208,6 +208,7 @@
  • load_data
  • ml_eval
  • ml_train
  • +
  • models_configs
  • plots
  • utils
  • @@ -279,7 +280,7 @@

    Source code for iowa_forecast.plots

     from scipy.signal import find_peaks
     
     
    -
    [docs]def convert_to_datetime(dataframe: pd.DataFrame, col: str) -> pd.DataFrame: +
    [docs]def convert_to_datetime(dataframe: pd.DataFrame, col: str) -> pd.DataFrame: """ Convert a specified column in a DataFrame to datetime format. @@ -325,7 +326,7 @@

    Source code for iowa_forecast.plots

         return dataframe
    -
    [docs]def filter_by_date(dataframe: pd.DataFrame, col: str, start_date: str): +
    [docs]def filter_by_date(dataframe: pd.DataFrame, col: str, start_date: str): """Filter a DataFrame by a start date. Parameters @@ -348,7 +349,7 @@

    Source code for iowa_forecast.plots

         return dataframe
    -
    [docs]def plot_series(x_data, y_data, label: str, linestyle: str = "-", **kwargs) -> None: +
    [docs]def plot_series(x_data, y_data, label: str, linestyle: str = "-", **kwargs) -> None: """ Plot a series of data with optional markers. @@ -397,7 +398,7 @@

    Source code for iowa_forecast.plots

             plt.scatter(x_data, y_data, marker=marker, color=color)
    -
    [docs]def plot_historical_and_forecast( # pylint: disable=too-many-arguments, too-many-locals +
    [docs]def plot_historical_and_forecast( # pylint: disable=too-many-arguments, too-many-locals input_timeseries: pd.DataFrame, timestamp_col_name: str, data_col_name: str, diff --git a/_modules/iowa_forecast/utils.html b/_modules/iowa_forecast/utils.html index 379c0c7..0dc6273 100644 --- a/_modules/iowa_forecast/utils.html +++ b/_modules/iowa_forecast/utils.html @@ -208,6 +208,7 @@
  • load_data
  • ml_eval
  • ml_train
  • +
  • models_configs
  • plots
  • utils
  • @@ -254,6 +255,7 @@

    Source code for iowa_forecast.utils

     from __future__ import annotations
     
     from typing import List, Tuple
    +import re
     
     import pandas as pd
     import fnmatch
    @@ -262,7 +264,7 @@ 

    Source code for iowa_forecast.utils

     from google.cloud import bigquery
     
     
    -
    [docs]def normalize_item_name(item_name: str) -> str: +
    [docs]def normalize_item_name(item_name: str) -> str: """ Convert 'item_name' values to lower case and replace spaces with underscores. @@ -289,7 +291,7 @@

    Source code for iowa_forecast.utils

         return item_name.lower().replace(' ', '_')
    -
    [docs]def split_table_name_info(table_name: str) -> Tuple[str | None, str | None, str]: +
    [docs]def split_table_name_info(table_name: str) -> Tuple[str | None, str | None, str]: """ Extract components from a table name. @@ -326,7 +328,7 @@

    Source code for iowa_forecast.utils

         )
    -
    [docs]def create_bigquery_table_from_pandas( +
    [docs]def create_bigquery_table_from_pandas( client: bigquery.Client, dataframe: pd.DataFrame, table_id: str, @@ -379,7 +381,7 @@

    Source code for iowa_forecast.utils

         load_job.result()
    -
    [docs]def create_dataset_if_not_found( +
    [docs]def create_dataset_if_not_found( client: bigquery.Client, project_id: str | None = None, dataset_name: str = "bqmlforecast", @@ -441,7 +443,7 @@

    Source code for iowa_forecast.utils

                 raise exc
    -
    [docs]def list_tables_with_pattern( +
    [docs]def list_tables_with_pattern( client: bigquery.Client, dataset_id: str, table_pattern: str, @@ -501,6 +503,105 @@

    Source code for iowa_forecast.utils

         ]
     
         return matching_tables
    + + +
    [docs]def parse_combined_string(combined: str) -> dict: + """Parse a combined offset string into its components. + + Parameters + ---------- + combined : str + A combined string specifying the offset, e.g., `'2Y3M2W1D'`. + + Returns + ------- + dict + A dictionary with keys `'years'`, `'months'`, `'weeks'`, `'days'` + and their corresponding values. + + Raises + ------ + ValueError + If the combined string is invalid. + """ + pattern = re.compile( + r'(?P<years>\d+Y)?(?P<months>\d+M)?(?P<weeks>\d+W)?(?P<days>\d+D)?', + re.IGNORECASE + ) + match = pattern.fullmatch(combined) + if not match: + raise ValueError(f"The specified `combined` string {combined} is not valid.") + + return {k: int(v[:-1]) if v else 0 for k, v in match.groupdict().items()}
    + + +
    [docs]def create_date_offset_from_parts(years=0, months=0, weeks=0, days=0) -> pd.DateOffset: + """Create a `pandas.DateOffset` object from individual time components. + + Parameters + ---------- + years : int, default=0 + Number of years for the offset. + months : int, default=0 + Number of months for the offset. + weeks : int, default=0 + Number of weeks for the offset. + days : int, default=0 + Number of days for the offset. + + Returns + ------- + pd.DateOffset + A `pandas.DateOffset` object for the specified time components. + """ + return pd.DateOffset(years=years, months=months, weeks=weeks, days=days)
    + + +
    [docs]def date_offset(*args: Union[int, str], freq: str = None) -> pd.DateOffset: + """ + Generate a `pandas.DateOffset` based on the given frequency and value or a combined string. + + Parameters + ---------- + args : int or str + * If one argument is provided, it should be a combined string specifying + the offset, e.g., `'2Y3M2W1D'`. + * If two arguments are provided, they should be `n` (int) and `freq` (str). + freq : str {'days', 'weeks', 'months', 'years'}, optional + The frequency type. Valid options are `'days'`, `'weeks'`, `'months'`, `'years'`. + Ignored if `combined` is provided. + + Returns + ------- + pd.DateOffset + A `pandas.DateOffset` object for the specified frequency and value. + + Raises + ------ + ValueError + If `freq` is not one of the valid options or if the combined string is invalid. + """ + if len(args) == 1 and isinstance(args[0], str): + combined = args[0] + offset_parts = parse_combined_string(combined) + return create_date_offset_from_parts(**offset_parts) + + if len(args) == 2 and isinstance(args[0], int) and isinstance(args[1], str): + n, freq = args + freq = freq.lower() + valid_freqs = {"d": "days", "day": "days", "days": "days", + "w": "weeks", "week": "weeks", "weeks": "weeks", + "m": "months", "month": "months", "months": "months", + "y": "years", "year": "years", "years": "years"} + + if freq not in valid_freqs: + raise ValueError(f"The specified `freq` {freq} is not a valid frequency. " + "Valid frequencies are: 'days', 'weeks', 'months', 'years'.") + + return create_date_offset_from_parts(**{valid_freqs[freq]: n}) + + raise ValueError( + "Either provide a single combined string or both `n` and `freq` as arguments.")
    diff --git a/_sources/api_reference/index.rst.txt b/_sources/api_reference/index.rst.txt index 6cca042..ee4296a 100644 --- a/_sources/api_reference/index.rst.txt +++ b/_sources/api_reference/index.rst.txt @@ -1,5 +1,5 @@ API Reference -============== +============= This page gives an overview of all public pandas objects, functions and methods. All functions exposed in `iowa_forecast.*` namespace are public. @@ -12,14 +12,17 @@ The `iowa_forecast` package contains the following modules: * `iowa_forecast.ml_train`: BigQuery Model Training and Execution Module. +* `iowa_forecast.models_configs`: Classes for managing and validating + configuration parameters + * `iowa_forecast.plots`: Time Series Plotting and Date Handling Module. * `iowa_forecast.utils`: General utility functions Module. iowa\_forecast -------------- +-------------- .. toctree:: :maxdepth: 2 - iowa_forecast/index \ No newline at end of file + iowa_forecast/index diff --git a/_sources/api_reference/iowa_forecast/index.rst.txt b/_sources/api_reference/iowa_forecast/index.rst.txt index f2f95cc..ac3eae2 100644 --- a/_sources/api_reference/iowa_forecast/index.rst.txt +++ b/_sources/api_reference/iowa_forecast/index.rst.txt @@ -8,5 +8,6 @@ iowa\_forecast load_data ml_eval ml_train + models_configs plots utils diff --git a/_sources/api_reference/iowa_forecast/models_configs.rst.txt b/_sources/api_reference/iowa_forecast/models_configs.rst.txt new file mode 100644 index 0000000..872049d --- /dev/null +++ b/_sources/api_reference/iowa_forecast/models_configs.rst.txt @@ -0,0 +1,7 @@ +models\_configs +--------------- + +.. automodule:: iowa_forecast.models_configs + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/api_reference/index.html b/api_reference/index.html index aba4b8a..cc00c55 100644 --- a/api_reference/index.html +++ b/api_reference/index.html @@ -209,6 +209,7 @@
  • load_data
  • ml_eval
  • ml_train
  • +
  • models_configs
  • plots
  • utils
  • @@ -263,6 +264,8 @@

    API Referenceiowa_forecast.load_data: BigQuery Data Loading and Feature Engineering Module.

  • iowa_forecast.ml_eval: BigQuery Model Evaluation and Forecasting Module.

  • iowa_forecast.ml_train: BigQuery Model Training and Execution Module.

  • +
  • iowa_forecast.models_configs: Classes for managing and validating +configuration parameters

  • iowa_forecast.plots: Time Series Plotting and Date Handling Module.

  • iowa_forecast.utils: General utility functions Module.

  • @@ -274,6 +277,7 @@

    iowa_forecastload_data
  • ml_eval
  • ml_train
  • +
  • models_configs
  • plots
  • utils
  • diff --git a/api_reference/iowa_forecast/index.html b/api_reference/iowa_forecast/index.html index 93cf3b5..9fe5c6b 100644 --- a/api_reference/iowa_forecast/index.html +++ b/api_reference/iowa_forecast/index.html @@ -209,6 +209,7 @@
  • load_data
  • ml_eval
  • ml_train
  • +
  • models_configs
  • plots
  • utils
  • @@ -261,6 +262,7 @@

    iowa_forecastload_data
  • ml_eval
  • ml_train
  • +
  • models_configs
  • plots
  • utils
  • diff --git a/api_reference/iowa_forecast/load_data.html b/api_reference/iowa_forecast/load_data.html index fb08fbb..700db5e 100644 --- a/api_reference/iowa_forecast/load_data.html +++ b/api_reference/iowa_forecast/load_data.html @@ -209,6 +209,7 @@
  • load_data
  • ml_eval
  • ml_train
  • +
  • models_configs
  • plots
  • utils
  • @@ -294,43 +295,16 @@

    FunctionsGoogle, BigQuery

    -
    -
    -iowa_forecast.load_data.date_offset(n: int, freq: str) DateOffset[source]ΒΆ
    -

    Generate a pandas DateOffset based on the given frequency and value.

    -
    -
    Parameters:
    -
      -
    • n (int) – The number of time units for the offset.

    • -
    • freq (str {'days', 'weeks', 'months', 'years'}) – The frequency type. Valid options are β€˜days’, β€˜weeks’, β€˜months’, β€˜years’.

    • -
    -
    -
    Returns:
    -

    pd.DateOffset – A DateOffset object for the specified frequency and value.

    -
    -
    Raises:
    -

    ValueError – If freq is not one of the valid options.

    -
    -
    Return type:
    -

    DateOffset

    -
    -
    -
    -
    iowa_forecast.load_data.get_item_names_filter(items_list: List[str] | str) str[source]ΒΆ
    -

    Generate a β€œWHERE” clause component to filter values from column "item_name".

    -
    -
    items_listList[str] | str

    Item name or names to add to the β€œWHERE” clause component.

    -
    -
    +

    Generate a "WHERE" clause component to filter values from column "item_name".

    -
    Returns:
    -

    str – The β€œWHERE” clause component that can be used to filter values from column "item_name".

    +
    Parameters:
    +

    items_list (List[str] | str) – Item name or names to add to the "WHERE" clause component.

    -
    Parameters:
    -

    items_list (List[str] | str) –

    +
    Returns:
    +

    str – The "WHERE" clause component that can be used to filter values from column "item_name".

    Return type:

    str

    @@ -350,17 +324,13 @@

    Functions
    iowa_forecast.load_data.get_min_datapoints_filter(min_size: int) str[source]ΒΆ
    -

    Generate a β€œWHERE” clause to filter items that have at least min_size observations.

    -
    -
    min_sizeint

    Minimum number of observations to use as value for the β€œWHERE” clause.

    -
    -
    +

    Generate a "WHERE" clause to filter items that have at least min_size observations.

    -
    Returns:
    -

    str – The β€œWHERE” clause component.

    +
    Parameters:
    +

    min_size (int) – Minimum number of observations to use as value for the "WHERE" clause.

    -
    Parameters:
    -

    min_size (int) –

    +
    Returns:
    +

    str – The "WHERE" clause component.

    Return type:

    str

    @@ -625,7 +595,6 @@

    Functionsload_data diff --git a/api_reference/iowa_forecast/ml_train.html b/api_reference/iowa_forecast/ml_train.html index 0a90004..1595e58 100644 --- a/api_reference/iowa_forecast/ml_train.html +++ b/api_reference/iowa_forecast/ml_train.html @@ -3,7 +3,7 @@ - + ml_train - Iowa Liquor Sales Forecast 0.0.1 documentation @@ -209,6 +209,7 @@
  • load_data
  • ml_eval
  • ml_train
  • +
  • models_configs
  • plots
  • utils
  • @@ -286,7 +287,7 @@

    Functions
    -iowa_forecast.ml_train.create_model_query(item_name: str, timestamp_col: str = 'date', time_series_data_col: str = 'total_amount_sold', model_name: str = 'bqmlforecast.arima_plus_xreg_model', train_table_name: str = 'bqmlforecast.training_data', test_table_name: str = 'bqmlforecast.test_data', holiday_region: str = 'US', auto_arima: bool = True, adjust_step_changes: bool = True, clean_spikes_and_dips: bool = True) str[source]ΒΆ
    +iowa_forecast.ml_train.create_model_query(item_name: str, timestamp_col: str = 'date', time_series_data_col: str = 'total_amount_sold', model_name: str = 'bqmlforecast.arima_plus_xreg_model', train_table_name: str = 'bqmlforecast.training_data', test_table_name: str = 'bqmlforecast.test_data', **kwargs) str[source]ΒΆ

    Generate a BigQuery β€˜CREATE MODEL’ query for a specified item.

    This function constructs an SQL query to create an ARIMA_PLUS_XREG model in BigQuery, tailored for the provided item and its associated @@ -299,11 +300,21 @@

    Functionsstr, default "total_amount_sold") – The column name representing the time series data.

  • model_name (str, default "bqmlforecast.arima_plus_xreg_model") – The base name for the model.

  • train_table_name (str, default "bqmlforecast.training_data") – The name of the table containing training data.

  • -
  • test_table_name (str, default "bqmlforecast.test_data") – The name of the table containing test data.

  • -
  • holiday_region (str, default "US") – The holiday region to be used by the model.

  • -
  • auto_arima (bool, default True) – Whether to enable AUTO_ARIMA.

  • -
  • adjust_step_changes (bool, default True) – Whether to adjust for step changes in the data.

  • -
  • clean_spikes_and_dips (bool, default True) – Whether to clean spikes and dips in the data.

  • +
  • test_table_name (str | None, default "bqmlforecast.test_data") – The name of the table containing test data.

  • +
  • **kwargs (Any) –

    Additional keyword arguments such as:

    +
    +
    +
    holiday_regionstr, default=”US”

    The holiday region to be used by the model.

    +
    +
    auto_arimabool, default=True

    Whether to enable AUTO_ARIMA.

    +
    +
    adjust_step_changesbool, default=True

    Whether to adjust for step changes in the data.

    +
    +
    clean_spikes_and_dipsbool, default=True

    Whether to clean spikes and dips in the data.

    +
    +
    +
    +

  • Returns:
    @@ -315,6 +326,57 @@

    Functions +
    +iowa_forecast.ml_train.include_test_on_model_train(item_name: str, timestamp_col: str, train_table_name: str, test_table_name: str | None = None) str[source]ΒΆ
    +

    Include test data in the model training process.

    +

    This function generates an SQL query component to union test data with +training data if a test table is specified.

    +
    +
    Parameters:
    +
      +
    • item_name (str) – The name of the item being modeled.

    • +
    • timestamp_col (str) – The column name representing the timestamp in the dataset.

    • +
    • train_table_name (str) – The name of the table containing training data.

    • +
    • test_table_name (str or None, optional) – The name of the table containing test data. If None, no test data +is included.

    • +
    +
    +
    Returns:
    +

    str – An SQL query string component to include test data.

    +
    +
    Return type:
    +

    str

    +
    +
    +

    + +
    +
    +iowa_forecast.ml_train.include_test_on_arima_model_train(column: str, time_series_timestamp_col: str, time_series_id_col: str, train_table_name: str, test_table_name: str | None = None) str[source]ΒΆ
    +

    Include test data in the uni-variate ARIMA model training process.

    +

    This function generates an SQL query component to union test data with +training data if a test table is specified.

    +
    +
    Parameters:
    +
      +
    • column (str) – The name of the feature being modeled.

    • +
    • time_series_timestamp_col (str) – The column name representing the timestamp in the dataset.

    • +
    • time_series_id_col (str) – The column name representing the identifier.

    • +
    • train_table_name (str) – The name of the table containing training data.

    • +
    • test_table_name (str or None, optional) – The name of the table containing test data. If None, no test data +is included.

    • +
    +
    +
    Returns:
    +

    str – An SQL query string component to include test data.

    +
    +
    Return type:
    +

    str

    +
    +
    +
    +
    iowa_forecast.ml_train.execute_query_with_retries(client: Client, query: str, max_retries: int = 3) None[source]ΒΆ
    @@ -355,9 +417,9 @@

    Functions
    -iowa_forecast.ml_train.create_models_for_items(client: bigquery.Client, items_list: List[str], max_items: int | None = None, timestamp_col: str = 'date', time_series_data_col: str = 'total_amount_sold', model_name: str = 'bqmlforecast.arima_plus_xreg_model', train_table_name: str = 'bqmlforecast.training_data', test_table_name: str = 'bqmlforecast.test_data', holiday_region: str = 'US', auto_arima: bool = True, adjust_step_changes: bool = True, clean_spikes_and_dips: bool = True) None[source]ΒΆ
    -

    Create ARIMA_PLUS_XREG models for a list of items.

    -

    This function generates and executes a CREATE MODEL query +iowa_forecast.ml_train.create_models_for_items(client: bigquery.Client, items_list: List[str], max_items: int | None = None, timestamp_col: str = 'date', time_series_data_col: str = 'total_amount_sold', model_name: str = 'bqmlforecast.arima_plus_xreg_model', train_table_name: str = 'bqmlforecast.training_data', test_table_name: str | None = 'bqmlforecast.test_data', **kwargs) None[source]ΒΆ +

    Create 'ARIMA_PLUS_XREG' models for a list of items.

    +

    This function generates and executes a 'CREATE MODEL' query for each item in the provided list. The models are created using the specified training and test tables in BigQuery.

    @@ -371,11 +433,23 @@

    Functionsstr, default "total_amount_sold") – The column name representing the time series data.

  • model_name (str, default "bqmlforecast.arima_plus_xreg_model") – The base name for the models.

  • train_table_name (str, default "bqmlforecast.training_data") – The name of the table containing training data.

  • -
  • test_table_name (str, default "bqmlforecast.test_data") – The name of the table containing test data.

  • -
  • holiday_region (str, default "US") – The holiday region to be used by the models.

  • -
  • auto_arima (bool, default True) – Whether to enable AUTO_ARIMA.

  • -
  • adjust_step_changes (bool, default True) – Whether to adjust for step changes in the data.

  • -
  • clean_spikes_and_dips (bool, default True) – Whether to clean spikes and dips in the data.

  • +
  • test_table_name (str | None, default "bqmlforecast.test_data") – The name of the table containing test data. +If None, then only the data from train_table_name is used for +training the model. See the β€˜Notes’ section for more information.

  • +
  • **kwargs (Any) –

    Additional keyword arguments such as:

    +
    +
    +
    holiday_regionstr, default=”US”

    The holiday region to be used by the models.

    +
    +
    auto_arimabool, default=True

    Whether to enable 'AUTO_ARIMA'.

    +
    +
    adjust_step_changesbool, default=True

    Whether to adjust for step changes in the data.

    +
    +
    clean_spikes_and_dipsbool, default=True

    Whether to clean spikes and dips in the data.

    +
    +
    +
    +

  • Return type:
    @@ -393,14 +467,20 @@

    Functionstest_table_name to None. Doing so will cause +the model to be trained using only the specified data from the +train_table_name which in turn will allow you to use the data from +test_table_name for evaluation.

    -iowa_forecast.ml_train.train_arima_models(client: bigquery.Client, columns: List[str], model: str = 'bqmlforecast.arima_model', train_table_name: str = 'bqmlforecast.training_data', test_table_name: str = 'bqmlforecast.test_data', model_metrics_table_name: str | None = 'bqmlforecast.arima_model_metrics', time_series_timestamp_col: str = 'date', time_series_id_col: str = 'item_name', confidence_level=0.9, horizon=7)[source]ΒΆ
    +iowa_forecast.ml_train.train_arima_models(client: bigquery.Client, columns: List[str], model: str = 'bqmlforecast.arima_model', train_table_name: str = 'bqmlforecast.training_data', test_table_name: str | None = 'bqmlforecast.test_data', model_metrics_table_name: str | None = 'bqmlforecast.arima_model_metrics', time_series_timestamp_col: str = 'date', time_series_id_col: str = 'item_name', confidence_level: float = 0.9, horizon: int = 7, use_test_data_on_train: bool = True, **kwargs)[source]ΒΆ

    Train ARIMA models for a list of columns and store their metrics.

    -

    This function generates and executes β€˜CREATE MODEL’ queries for ARIMA +

    This function generates and executes 'CREATE MODEL' queries for ARIMA models using the specified columns, and evaluates their performance by creating tables of model metrics.

    These ARIMA models will then be used to generate the future feature values @@ -412,12 +492,13 @@

    FunctionsList[str]) – List of columns to be used for creating ARIMA models.

  • model (str, default "bqmlforecast.arima_model") – The base name for the ARIMA models.

  • train_table_name (str, default "bqmlforecast.training_data") – The name of the table containing training data.

  • -
  • test_table_name (str, default "bqmlforecast.test_data") – The name of the table containing test data.

  • +
  • test_table_name (str | None, default "bqmlforecast.test_data") – The name of the table containing test data.

  • model_metrics_table_name (str or None, default "bqmlforecast.arima_model_metrics") – The base name for the tables where model metrics will be stored.

  • time_series_timestamp_col (str, default "date") – The column name representing the timestamp in the dataset.

  • time_series_id_col (str, default "item_name") – The column name representing the identifier for the time series.

  • confidence_level (float, default 0.9) – The confidence level used in the model evaluation.

  • horizon (int, default 7) – The number of time steps (days) to forecast.

  • +
  • use_test_data_on_train (bool, default True) – Whether to use test data during model training.

  • @@ -430,12 +511,12 @@

    Functions - +
    Next
    -
    plots
    +
    models_configs
    @@ -483,6 +564,8 @@

    Functionsml_train
    • Functions
    • create_model_query()
    • +
    • include_test_on_model_train()
    • +
    • include_test_on_arima_model_train()
    • execute_query_with_retries()
    • create_models_for_items()
    • train_arima_models()
    • diff --git a/api_reference/iowa_forecast/models_configs.html b/api_reference/iowa_forecast/models_configs.html new file mode 100644 index 0000000..f78884c --- /dev/null +++ b/api_reference/iowa_forecast/models_configs.html @@ -0,0 +1,510 @@ + + + + + + + + + models_configs - Iowa Liquor Sales Forecast 0.0.1 documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
      +
      +
      + +
      + +
      +
      + +
      + +
      +
      + +
      +
      +
      + + + + + Back to top + +
      + +
      + +
      + +
      +
      +
      +

      models_configsΒΆ

      +

      This module provides classes for managing and validating configuration parameters +for various machine learning models, specifically focusing on ARIMA-based models. +The module includes a base class that standardizes the process of handling model +configuration, ensuring that all derived classes follow a consistent pattern for +validating and setting parameters.

      +
      +

      ClassesΒΆ

      +
      +
      AbstractBaseModelConfigABC

      An abstract base class for the BaseModelConfig class.

      +
      +
      BaseModelConfigAbstractBaseModelConfig

      A base class that provides common functionality for model +configuration, including parameter validation, default value handling, and +error checking. Subclasses are required to define a SUPPORTED_PARAMETERS +dictionary that specifies the expected parameter types, default values, +and any valid choices.

      +
      +
      ARIMAConfigBaseModelConfig

      A configuration class for ARIMA model parameters. Inherits from BaseModelConfig +and defines specific parameters used by ARIMA and ARIMA_PLUS models. This class +ensures that the parameters adhere to the expected types and valid choices.

      +
      +
      ARIMA_PLUS_XREG_ConfigBaseModelConfig

      A configuration class for ARIMA_PLUS_XREG model parameters. This class extends +BaseModelConfig and includes additional parameters for handling exogenous +variables (xreg_features) and other settings specific to the ARIMA_PLUS_XREG model.

      +
      +
      +
      +
      +

      UsageΒΆ

      +

      These configuration classes are intended to be used in the setup and validation of +model parameters before they are passed to machine learning model training functions. +By leveraging these classes, developers can ensure that all configuration parameters +are correctly typed, fall within valid ranges, and adhere to expected choices, reducing +the likelihood of runtime errors.

      +
      +

      Example

      +
      >>> config = ARIMAConfig(model_type="ARIMA")
      +>>> print(config.model_type)
      +'ARIMA'
      +
      +
      +
      >>> xreg_config = ARIMA_PLUS_XREG_Config(
      +...     model_type="ARIMA_PLUS_XREG",
      +...     xreg_features=["feature1", "feature2"],
      +...     non_negative_forecast=True
      +... )
      +>>> print(xreg_config.xreg_features)
      +['feature1', 'feature2']
      +
      +
      +
      +
      +
      +
      +class iowa_forecast.models_configs.AbstractBaseModelConfig[source]ΒΆ
      +

      Bases: ABC

      +

      Abstract base class for BaseModelConfig configuration class.

      +
      +
      Attributes:
      +
      +
      SUPPORTED_PARAMETERS

      This abstract property must be implemented by subclasses.

      +
      +
      +
      +
      +
      +
      +abstract property SUPPORTED_PARAMETERS: Dict[str, Tuple[Any, Any, List[Any]]]ΒΆ
      +

      This abstract property must be implemented by subclasses. +It should return a dictionary where the keys are parameter names, +and the values are tuples containing the expected type, default value, +and a list of valid choices (if any).

      +
      + +
      + +
      +
      +class iowa_forecast.models_configs.BaseModelConfig(**kwargs)[source]ΒΆ
      +

      Bases: AbstractBaseModelConfig

      +

      Base class for model configuration parameters.

      +

      This class provides common functionality for handling configuration parameters +passed via kwargs, including unpacking, validation, and setting default values.

      +

      Subclasses must define the SUPPORTED_PARAMETERS dictionary, which specifies +the expected parameter types, default values, and any restricted choices.

      +
      +
      Attributes:
      +
      +
      SUPPORTED_PARAMETERS

      This abstract property must be implemented by subclasses.

      +
      +
      +
      +
      +
      +
      +property SUPPORTED_PARAMETERS: Dict[str, Tuple[Any, Any, List[Any]]]ΒΆ
      +

      This abstract property must be implemented by subclasses. +It should return a dictionary where the keys are parameter names, +and the values are tuples containing the expected type, default value, +and a list of valid choices (if any).

      +
      + +
      + +
      +
      +class iowa_forecast.models_configs.ARIMAConfig(**kwargs)[source]ΒΆ
      +

      Bases: BaseModelConfig

      +

      Configuration class for 'ARIMA' model parameters.

      +

      Inherits common functionality from BaseModelConfig and defines specific +parameters for 'ARIMA' models, including validation of choices for some +parameters.

      +
      +
      Attributes:
      +
      +
      SUPPORTED_PARAMETERS

      This abstract property must be implemented by subclasses.

      +
      +
      +
      +
      +
      +
      +property SUPPORTED_PARAMETERS: Dict[str, Tuple[Any, Any, List[Any]]]ΒΆ
      +

      This abstract property must be implemented by subclasses. +It should return a dictionary where the keys are parameter names, +and the values are tuples containing the expected type, default value, +and a list of valid choices (if any).

      +
      + +
      + +
      +
      +class iowa_forecast.models_configs.ARIMA_PLUS_XREG_Config(**kwargs)[source]ΒΆ
      +

      Bases: BaseModelConfig

      +

      Configuration class for 'ARIMA_PLUS_XREG' model parameters.

      +

      Inherits common functionality from BaseModelConfig and defines specific +parameters for 'ARIMA_PLUS_XREG' models, including validation of choices for +some parameters.

      +
      +
      Attributes:
      +
      +
      SUPPORTED_PARAMETERS

      This abstract property must be implemented by subclasses.

      +
      +
      +
      +
      +
      +
      +property SUPPORTED_PARAMETERS: Dict[str, Tuple[Any, Any, List[Any]]]ΒΆ
      +

      This abstract property must be implemented by subclasses. +It should return a dictionary where the keys are parameter names, +and the values are tuples containing the expected type, default value, +and a list of valid choices (if any).

      +
      + +
      + +
      + +
      +
      + +
      + +
      +
      + + + + + + \ No newline at end of file diff --git a/api_reference/iowa_forecast/plots.html b/api_reference/iowa_forecast/plots.html index 7d0a2db..fda509d 100644 --- a/api_reference/iowa_forecast/plots.html +++ b/api_reference/iowa_forecast/plots.html @@ -3,7 +3,7 @@ - + plots - Iowa Liquor Sales Forecast 0.0.1 documentation @@ -209,6 +209,7 @@
    • load_data
    • ml_eval
    • ml_train
    • +
    • models_configs
    • plots
    • utils
    @@ -467,14 +468,14 @@

    Functions - +
    Previous
    -
    ml_train
    +
    models_configs
    diff --git a/api_reference/iowa_forecast/utils.html b/api_reference/iowa_forecast/utils.html index 4a80c62..5b43c0b 100644 --- a/api_reference/iowa_forecast/utils.html +++ b/api_reference/iowa_forecast/utils.html @@ -209,6 +209,7 @@
  • load_data
  • ml_eval
  • ml_train
  • +
  • models_configs
  • plots
  • utils
  • @@ -416,6 +417,78 @@

    +
    +
    +iowa_forecast.utils.parse_combined_string(combined: str) dict[source]ΒΆ
    +

    Parse a combined offset string into its components.

    +
    +
    Parameters:
    +

    combined (str) – A combined string specifying the offset, e.g., '2Y3M2W1D'.

    +
    +
    Returns:
    +

    dict – A dictionary with keys 'years', 'months', 'weeks', 'days' +and their corresponding values.

    +
    +
    Raises:
    +

    ValueError – If the combined string is invalid.

    +
    +
    Return type:
    +

    dict

    +
    +
    +
    + +
    +
    +iowa_forecast.utils.create_date_offset_from_parts(years=0, months=0, weeks=0, days=0) DateOffset[source]ΒΆ
    +

    Create a pandas.DateOffset object from individual time components.

    +
    +
    Parameters:
    +
      +
    • years (int, default 0) – Number of years for the offset.

    • +
    • months (int, default 0) – Number of months for the offset.

    • +
    • weeks (int, default 0) – Number of weeks for the offset.

    • +
    • days (int, default 0) – Number of days for the offset.

    • +
    +
    +
    Returns:
    +

    pd.DateOffset – A pandas.DateOffset object for the specified time components.

    +
    +
    Return type:
    +

    DateOffset

    +
    +
    +
    + +
    +
    +iowa_forecast.utils.date_offset(*args: int | str, freq: str = None) pd.DateOffset[source]ΒΆ
    +

    Generate a pandas.DateOffset based on the given frequency and value or a combined string.

    +
    +
    Parameters:
    +
      +
    • args (int or str) –

        +
      • If one argument is provided, it should be a combined string specifying +the offset, e.g., '2Y3M2W1D'.

      • +
      • If two arguments are provided, they should be n (int) and freq (str).

      • +
      +

    • +
    • freq (str {'days', 'weeks', 'months', 'years'}, optional) – The frequency type. Valid options are 'days', 'weeks', 'months', 'years'. +Ignored if combined is provided.

    • +
    +
    +
    Returns:
    +

    pd.DateOffset – A pandas.DateOffset object for the specified frequency and value.

    +
    +
    Raises:
    +

    ValueError – If freq is not one of the valid options or if the combined string is invalid.

    +
    +
    Return type:
    +

    pd.DateOffset

    +
    +
    +
    + @@ -471,6 +544,9 @@
  • create_bigquery_table_from_pandas()
  • create_dataset_if_not_found()
  • list_tables_with_pattern()
  • +
  • parse_combined_string()
  • +
  • create_date_offset_from_parts()
  • +
  • date_offset()
  • diff --git a/docs/html/_static/scripts/furo.js.LICENSE.html b/docs/html/_static/scripts/furo.js.LICENSE.html index 78f0b7f..2dc5354 100644 --- a/docs/html/_static/scripts/furo.js.LICENSE.html +++ b/docs/html/_static/scripts/furo.js.LICENSE.html @@ -209,6 +209,7 @@
  • load_data
  • ml_eval
  • ml_train
  • +
  • models_configs
  • plots
  • utils
  • diff --git a/genindex.html b/genindex.html index 78a47bd..fbf9484 100644 --- a/genindex.html +++ b/genindex.html @@ -207,6 +207,7 @@
  • load_data
  • ml_eval
  • ml_train
  • +
  • models_configs
  • plots
  • utils
  • @@ -249,33 +250,61 @@

    Index

    -
    C | D | E | F | G | I | L | M | N | P | S | T
    +
    A | B | C | D | E | F | G | I | L | M | N | P | S | T
    +
    +

    A

    + + + +
    +
    + +
    +

    B

    + + +
    +
    +

    C

    @@ -285,7 +314,7 @@

    C

    D

    @@ -295,15 +324,15 @@

    D

    E

    @@ -313,7 +342,7 @@

    E

    F

    @@ -323,25 +352,25 @@

    F

    G

    @@ -351,48 +380,52 @@

    G

    I

    @@ -403,7 +436,7 @@

    I

    L

    @@ -417,22 +450,22 @@

    M

    module @@ -442,7 +475,7 @@

    M

    N

    @@ -452,11 +485,13 @@

    N

    P

    @@ -466,8 +501,18 @@

    P

    S

    @@ -476,7 +521,7 @@

    S

    T

    diff --git a/index.html b/index.html index 85b1488..409e559 100644 --- a/index.html +++ b/index.html @@ -209,6 +209,7 @@
  • load_data
  • ml_eval
  • ml_train
  • +
  • models_configs
  • plots
  • utils
  • diff --git a/installation.html b/installation.html index e03601a..23abd91 100644 --- a/installation.html +++ b/installation.html @@ -209,6 +209,7 @@
  • load_data
  • ml_eval
  • ml_train
  • +
  • models_configs
  • plots
  • utils
  • diff --git a/iowa_forecast.html b/iowa_forecast.html index 176481b..e68097b 100644 --- a/iowa_forecast.html +++ b/iowa_forecast.html @@ -339,7 +339,7 @@

    Functions
    Returns:
    -

    str – The β€œWHERE” clause component that can be used to filter values from column "item_name".

    +

    str – The β€œWHERE” clause component that can be used to filter values from column `”item_name”.

    Parameters:

    items_list (List[str] | str) –

    @@ -586,8 +586,8 @@

    Functions -

    FunctionsΒΆ

    +
    +

    FunctionsΒΆ

    • evaluate_models: evaluates β€˜ARIMA_PLUS_XREG’ models for a list of items, returning performance metrics in a pandas.DataFrame.

    • @@ -872,8 +872,8 @@

      Functions -
      -

      FunctionsΒΆ

      +
      +

      FunctionsΒΆ

      • create_model_query: Generate an SQL query to create an ARIMA_PLUS_XREG model for a specified item and its associated data.

      • @@ -966,7 +966,7 @@

        Functions
        -iowa_forecast.ml_train.create_models_for_items(client: bigquery.Client, items_list: List[str], max_items: int | None = None, timestamp_col: str = 'date', time_series_data_col: str = 'total_amount_sold', model_name: str = 'bqmlforecast.arima_plus_xreg_model', train_table_name: str = 'bqmlforecast.training_data', test_table_name: str = 'bqmlforecast.test_data', holiday_region: str = 'US', auto_arima: bool = True, adjust_step_changes: bool = True, clean_spikes_and_dips: bool = True) None[source]ΒΆ
        +iowa_forecast.ml_train.create_models_for_items(client: google.cloud.bigquery.Client, items_list: List[str], max_items: int | None = None, timestamp_col: str = 'date', time_series_data_col: str = 'total_amount_sold', model_name: str = 'bqmlforecast.arima_plus_xreg_model', train_table_name: str = 'bqmlforecast.training_data', test_table_name: str = 'bqmlforecast.test_data', holiday_region: str = 'US', auto_arima: bool = True, adjust_step_changes: bool = True, clean_spikes_and_dips: bool = True) None[source]ΒΆ

        Create ARIMA_PLUS_XREG models for a list of items.

        This function generates and executes a CREATE MODEL query for each item in the provided list. The models are created @@ -974,7 +974,7 @@

        Functions
        Parameters:
          -
        • client (bigquery.Client) – Instance of the BigQuery client used to execute queries.

        • +
        • client (google.cloud.bigquery.Client) – Instance of the BigQuery client used to execute queries.

        • items_list (List[str]) – List of item names for which models are to be created.

        • max_items (int or None, default None) – Maximum number of items to process. If None, all items are processed. See the β€˜Notes’ section for more information.

        • @@ -1037,80 +1037,19 @@

          Functions

          iowa_forecast.plotsΒΆ

          -

          Time Series Plotting and Date Handling Module.

          -

          This module provides functions for handling date-related operations -on DataFrames and for visualizing time series data, including historical, -forecast, and actual values. It supports both Matplotlib and Plotly -as plotting engines, offering flexibility in visualization options.

          -
          -

          FunctionsΒΆ

          -
            -
          • convert_to_datetime: convert a column in a DataFrame to datetime format.

          • -
          • filter_by_date: filter a DataFrame by a start date.

          • -
          • plot_historical_and_forecast: plot historical data with optional forecast and actual values.

          • -
          -
          -

          Notes

          -

          This module is designed to assist in the preparation and visualization -of time series data. The plot_historical_and_forecast function is -particularly useful for comparing historical data with forecasted -and actual values, with options to highlight peaks and add custom -plot elements using either Matplotlib or Plotly.

          -
          -
          -iowa_forecast.plots.convert_to_datetime(dataframe: DataFrame, col: str) DataFrame[source]ΒΆ
          -

          Convert a specified column in a DataFrame to datetime format.

          -

          This function takes a DataFrame and converts the specified column -to pandas’ datetime format, enabling datetime operations on that column.

          -
          -
          Parameters:
          -
            -
          • dataframe (pd.DataFrame) – The DataFrame containing the column to convert.

          • -
          • col (str) – The name of the column in the DataFrame to convert to datetime format.

          • -
          -
          -
          Returns:
          -

          pd.DataFrame – The original DataFrame with the specified column converted to datetime format.

          -
          -
          Return type:
          -

          DataFrame

          -
          -
          -
          -

          Notes

          -

          You can also chain this function using pandas.DataFrame.pipe:

          -
          df = pd.DataFrame({
          -    'date': ['2023-01-01', '2023-01-02'],
          -    'value': [10, 15]
          -}).pipe(convert_to_datetime, 'date')
          -
          -
          -
          -
          -

          Examples

          -

          Convert the β€˜date’ column in a DataFrame to datetime format:

          -
          >>> df = pd.DataFrame({
          -...     'date': ['2023-01-01', '2023-01-02'],
          -...     'value': [10, 15]
          -... })
          ->>> df = convert_to_datetime(df,'date')
          ->>> dataframe['date'].dtype
          -dtype('<M8[ns]')
          -
          -
          -
          -
          +iowa_forecast.plots.convert_to_datetime(df, col)[source]ΒΆ +
          -iowa_forecast.plots.filter_by_date(dataframe: DataFrame, col: str, start_date: str)[source]ΒΆ
          +iowa_forecast.plots.filter_by_date(df: DataFrame, col: str, start_date: str)[source]ΒΆ

          Filter a DataFrame by a start date.

          Parameters:
            -
          • dataframe (pd.DataFrame) – The DataFrame to filter.

          • +
          • df (pd.DataFrame) – The DataFrame to filter.

          • col (str) – The name of the datetime column to filter by.

          • start_date (str) – The start date to filter the DataFrame. If None, no filtering is done.

          @@ -1121,57 +1060,6 @@

          Functions

          -
          -
          -iowa_forecast.plots.plot_series(x_data, y_data, label: str, linestyle: str = '-', **kwargs) None[source]ΒΆ
          -

          Plot a series of data with optional markers.

          -

          This function plots a series of data using Matplotlib, with options -to customize the line style, add markers, and change the marker color.

          -
          -
          Parameters:
          -
            -
          • x_data (array-like) – The data for the x-axis.

          • -
          • y_data (array-like) – The data for the y-axis.

          • -
          • label (str) – The label for the plot legend.

          • -
          • linestyle (str, default "-") – The line style for the plot, e.g., β€˜-’ for a solid line, β€˜β€“β€™ for a dashed line.

          • -
          • **kwargs (dict, optional) –

            Additional keyword arguments for customizing the plot. -Available options: -- marker: str

            -
            -

            The marker style for scatter points.

            -
            -
              -
            • -
              color: str

              The color of the markers.

              -
              -
              -
            • -
            -

          • -
          -
          -
          Returns:
          -

          None

          -
          -
          Return type:
          -

          None

          -
          -
          -
          -

          Examples

          -

          Plot a series of data with default settings:

          -
          >>> x = [1, 2, 3, 4]
          ->>> y = [10, 15, 10, 20]
          ->>> plot_series(x, y, label="Sample Data")
          -
          -
          -

          Plot a series with markers:

          -
          >>> plot_series(x, y, label="Sample Data", marker="o", color="red")
          -
          -
          -
          -
          -
          iowa_forecast.plots.plot_historical_and_forecast(input_timeseries: pd.DataFrame, timestamp_col_name: str, data_col_name: str, forecast_output: pd.DataFrame | None = None, forecast_col_names: dict | None = None, actual: pd.DataFrame | None = None, actual_col_names: dict | None = None, title: str | None = None, plot_start_date: str | None = None, show_peaks: bool = True, engine: str = 'matplotlib', **plot_kwargs) None[source]ΒΆ
          @@ -1454,7 +1342,7 @@

          Functions
        • iowa_forecast.ml_eval
            -
          • Functions
          • +
          • Functions
          • evaluate_models()
          • get_data()
          • create_query()
          • @@ -1467,7 +1355,7 @@

            Functions
          • iowa_forecast.ml_train
              -
            • Functions
            • +
            • Functions
            • create_model_query()
            • execute_query_with_retries()
            • create_models_for_items()
            • @@ -1475,10 +1363,8 @@

              Functions
            • iowa_forecast.plots
            • diff --git a/modules.html b/modules.html index b01f91f..797aa29 100644 --- a/modules.html +++ b/modules.html @@ -274,7 +274,7 @@

              iowa_forecastiowa_forecast.ml_eval diff --git a/searchindex.js b/searchindex.js index 40e45f2..3cfd95c 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["api_reference/index", "api_reference/iowa_forecast/index", "api_reference/iowa_forecast/load_data", "api_reference/iowa_forecast/ml_eval", "api_reference/iowa_forecast/ml_train", "api_reference/iowa_forecast/plots", "api_reference/iowa_forecast/utils", "docs/html/_static/scripts/furo.js.LICENSE", "index", "installation", "iowa_forecast", "modules"], "filenames": ["api_reference/index.rst", "api_reference/iowa_forecast/index.rst", "api_reference/iowa_forecast/load_data.rst", "api_reference/iowa_forecast/ml_eval.rst", "api_reference/iowa_forecast/ml_train.rst", "api_reference/iowa_forecast/plots.rst", "api_reference/iowa_forecast/utils.rst", "docs/html/_static/scripts/furo.js.LICENSE.txt", "index.rst", "installation.rst", "iowa_forecast.rst", "modules.rst"], "titles": ["API Reference", "iowa_forecast", "load_data", "ml_eval", "ml_train", "plots", "utils", "<no title>", "Iowa Liquor Sales Forecast", "Installation", "API reference", "iowa_forecast"], "terms": {"thi": [0, 2, 3, 4, 5, 6, 9, 10], "page": [0, 8, 9, 10], "give": [0, 10], "an": [0, 2, 3, 4, 9, 10], "overview": [0, 10], "all": [0, 4, 5, 6, 9, 10], "public": [0, 2, 10], "panda": [0, 2, 3, 5, 6, 10], "object": [0, 2, 10], "function": [0, 6, 11], "method": [0, 10], "expos": [0, 10], "namespac": [0, 10], "ar": [0, 2, 3, 4, 6, 9, 10], "The": [0, 2, 3, 4, 5, 6, 9, 10], "packag": [0, 9, 10], "contain": [0, 2, 3, 4, 5, 6, 9, 10], "follow": [0, 2, 3, 9, 10], "modul": [0, 2, 3, 4, 5, 6, 11], "load_data": [0, 1, 11], "bigqueri": [0, 2, 3, 4, 6, 9, 10], "data": [0, 2, 3, 4, 5, 9, 10], "load": [0, 2, 6, 10], "featur": [0, 2, 4, 10], "engin": [0, 2, 5, 10], "ml_eval": [0, 1, 11], "model": [0, 2, 3, 4, 6, 9, 10], "evalu": [0, 3, 4, 10], "forecast": [0, 2, 3, 4, 5, 9, 10], "ml_train": [0, 1, 11], "train": [0, 2, 3, 4, 9, 10], "execut": [0, 3, 4, 9, 10], "plot": [0, 1, 11], "time": [0, 2, 3, 4, 5, 10], "seri": [0, 2, 3, 4, 5, 10], "date": [0, 2, 3, 4, 5, 10], "handl": [0, 2, 4, 5, 10], "util": [0, 1, 2, 3, 9, 11], "gener": [0, 2, 3, 4, 5, 6, 9, 10], "provid": [2, 3, 4, 5, 6, 9, 10], "process": [2, 4, 10], "prepar": [2, 5, 10], "us": [2, 3, 4, 5, 6, 9, 10], "googl": [2, 3, 4, 10], "includ": [2, 3, 4, 5, 10], "creat": [2, 3, 4, 6, 9, 10], "dataset": [2, 4, 6, 9, 10], "offset": [2, 10], "item": [2, 3, 4, 6, 10], "filter": [2, 3, 5, 6, 10], "date_offset": [2, 10, 11], "dateoffset": [2, 10], "base": [2, 3, 4, 6, 10], "given": [2, 4, 6, 10], "frequenc": [2, 10], "valu": [2, 3, 4, 5, 6, 10], "get_item_names_filt": [2, 10, 11], "where": [2, 3, 4, 6, 9, 10], "claus": [2, 10], "compon": [2, 3, 6, 10], "from": [2, 3, 4, 6, 10], "column": [2, 3, 4, 5, 10], "item_nam": [2, 3, 4, 6, 10], "get_min_datapoints_filt": [2, 10, 11], "have": [2, 5, 9, 10], "least": [2, 10], "min_siz": [2, 10], "observ": [2, 10], "get_training_data": [2, 10, 11], "retriev": [2, 3, 6, 9, 10], "view": [2, 10], "get_year_weather_queri": [2, 10, 11], "sql": [2, 3, 4, 10], "queri": [2, 3, 4, 10], "weather": [2, 10], "specif": [2, 6, 10], "year": [2, 10], "state": [2, 10], "get_weather_queri": [2, 10, 11], "rang": [2, 3, 10], "create_forecast_features_queri": [2, 10, 11], "join": [2, 10], "tabl": [2, 3, 4, 6, 10], "create_future_data": [2, 10, 11], "futur": [2, 4, 10], "test": [2, 3, 4, 10], "create_future_feature_t": [2, 10, 11], "save": [2, 10], "result": [2, 3, 10], "multipl": [2, 3, 10], "i": [2, 3, 4, 5, 6, 9, 10], "design": [2, 3, 4, 5, 10], "work": [2, 3, 4, 10], "requir": [2, 3, 4, 5, 10], "valid": [2, 3, 4, 9, 10], "client": [2, 3, 4, 6, 10], "instanc": [2, 3, 4, 10], "focu": [2, 10], "variou": [2, 3, 10], "busi": [2, 3, 10], "context": [2, 3, 10], "iowa_forecast": [2, 3, 4, 5, 6, 8], "n": [2, 5, 10], "int": [2, 3, 4, 10], "freq": [2, 10], "str": [2, 3, 4, 5, 6, 10], "sourc": [2, 3, 4, 5, 6, 10], "paramet": [2, 3, 4, 5, 6, 10], "number": [2, 3, 4, 10], "unit": [2, 10], "dai": [2, 3, 4, 10], "week": [2, 10], "month": [2, 10], "type": [2, 3, 4, 5, 6, 9, 10], "option": [2, 3, 4, 5, 6, 10], "return": [2, 3, 4, 5, 6, 10], "pd": [2, 3, 5, 6, 10], "A": [2, 3, 4, 6, 7, 10], "specifi": [2, 3, 4, 5, 6, 10], "rais": [2, 4, 5, 6, 10], "valueerror": [2, 5, 10], "If": [2, 3, 4, 5, 6, 9, 10], "one": [2, 3, 6, 9, 10], "items_list": [2, 3, 4, 10], "list": [2, 3, 4, 6, 9, 10], "name": [2, 3, 4, 5, 6, 10], "add": [2, 5, 9, 10], "can": [2, 3, 5, 9, 10], "print": [2, 6, 10], "five": [2, 10], "o": [2, 5, 10], "clock": [2, 10], "vodka": [2, 6, 10], "firebal": [2, 10], "cinnamon": [2, 10], "whiskei": [2, 10], "black": [2, 10], "velvet": [2, 10], "OR": [2, 10], "minimum": [2, 10], "table_nam": [2, 3, 6, 10], "bqmlforecast": [2, 3, 4, 6, 10], "training_data": [2, 3, 4, 10], "start_dat": [2, 5, 10], "none": [2, 3, 4, 5, 6, 10], "end_dat": [2, 3, 10], "min_datapoints_r": [2, 10], "float": [2, 3, 4, 10], "0": [2, 3, 4, 10], "75": [2, 10], "base_t": [2, 10], "iowa_liquor_sal": [2, 10], "sale": [2, 4, 9, 10], "datafram": [2, 3, 5, 6, 10], "construct": [2, 4, 6, 10], "condit": [2, 10], "default": [2, 3, 4, 5, 6, 10], "store": [2, 4, 9, 10], "start": [2, 5, 9, 10], "yyyi": [2, 10], "mm": [2, 10], "dd": [2, 10], "format": [2, 5, 10], "determin": [2, 10], "wai": [2, 10], "end": [2, 3, 10], "equal": [2, 4, 10], "todai": [2, 10], "": [2, 3, 6, 9, 10], "calcul": [2, 10], "fraction": [2, 10], "between": [2, 4, 10], "each": [2, 3, 4, 6, 10], "should": [2, 3, 4, 10], "point": [2, 5, 9, 10], "consid": [2, 10], "singl": [2, 10], "extract": [2, 6, 10], "ia": [2, 10], "which": [2, 3, 4, 5, 10], "code": [2, 4, 10], "string": [2, 3, 4, 10], "dataset_id": [2, 6, 10], "forecast_tables_pattern": [2, 10], "forecast_": [2, 10], "connect": [2, 6, 10], "servic": [2, 6, 9, 10], "id": [2, 6, 10], "locat": [2, 6, 10], "pattern": [2, 6, 10], "match": [2, 6, 10], "select": [2, 3, 4, 9, 10], "cast": [2, 10], "t1": [2, 10], "forecast_timestamp": [2, 10], "AS": [2, 10], "total_amount_sold": [2, 4, 10], "t2": [2, 10], "forecast_valu": [2, 10], "temp": [2, 10], "t3": [2, 10], "rainfal": [2, 10], "t4": [2, 10], "snowfal": [2, 10], "fw": [2, 10], "temperatur": [2, 10], "forecast_temp": [2, 10], "inner": [2, 10], "forecast_rainfal": [2, 10], "ON": [2, 10], "AND": [2, 10], "forecast_snowfal": [2, 10], "left": [2, 10], "future_weather_data": [2, 10], "comprehens": [2, 10], "train_table_nam": [2, 3, 4, 10], "test_table_nam": [2, 4, 10], "test_data": [2, 3, 4, 10], "forecast_table_nam": [2, 3, 10], "forecast_data": [2, 3, 10], "horizon": [2, 3, 4, 10], "7": [2, 3, 4, 10], "It": [2, 3, 5, 10], "inform": [2, 4, 6, 10], "lag": [2, 10], "model_nam": [2, 4, 10], "confidence_level": [2, 3, 4, 5, 10], "9": [2, 4, 9, 10], "confid": [2, 3, 4, 10], "level": [2, 3, 4, 10], "arima_model": [2, 4, 10], "table_base_nam": [2, 10], "set": [3, 5, 9, 10], "explain": [3, 10], "well": [3, 10], "aggreg": [3, 10], "across": [3, 10], "evaluate_model": [3, 10, 11], "arima_plus_xreg": [3, 4, 10], "perform": [3, 4, 10], "metric": [3, 4, 10], "get_data": [3, 10, 11], "create_queri": [3, 10, 11], "get_train_data": [3, 10, 11], "get_actual_data": [3, 10, 11], "actual": [3, 5, 9, 10], "get_predict": [3, 10, 11], "predict": [3, 10], "evaluate_predict": [3, 10, 11], "against": [3, 6, 10], "comparison": [3, 10], "multi_evaluate_predict": [3, 10, 11], "dictionari": [3, 5, 10], "explain_model": [3, 10, 11], "explan": [3, 10], "primarili": [3, 10], "intend": [3, 4, 10], "perform_aggreg": [3, 10], "bool": [3, 4, 5, 10], "true": [3, 4, 5, 10], "arima_plus_xreg_model": [3, 4, 10], "actual_table_nam": [3, 10], "period": [3, 10], "maximum": [3, 4, 10], "step": [3, 4, 9, 10], "ahead": [3, 10], "whether": [3, 4, 5, 10], "date_filt": [3, 10], "order_bi": [3, 10], "databas": [3, 10], "order": [3, 10], "For": [3, 9, 10], "exampl": 3, "you": [3, 4, 5, 9, 10], "want": [3, 5, 10], "sort": [3, 10], "8": [3, 10], "tupl": [3, 6, 10], "compar": [3, 5, 10], "two": [3, 10], "dict": [3, 5, 10], "kei": [3, 5, 9, 10], "sub": [3, 10], "train_df": [3, 10], "eval_df": [3, 10], "ml": [3, 10], "explain_forecast": [3, 10], "http": [3, 7, 9, 10], "cloud": [3, 4, 10], "com": [3, 7, 9, 10], "doc": [3, 10], "refer": [3, 8, 9, 11], "standard": [3, 10], "bigqueryml": [3, 10], "syntax": [3, 10], "manag": [4, 10], "retri": [4, 10], "create_model_queri": [4, 10, 11], "its": [4, 10], "associ": [4, 10], "execute_query_with_retri": [4, 10, 11], "logic": [4, 10], "case": [4, 6, 10], "failur": [4, 10], "create_models_for_item": [4, 10, 11], "train_arima_model": [4, 10, 11], "arima": [4, 6, 10], "correspond": [4, 10], "holidai": [4, 10], "effect": [4, 10], "chang": [4, 5, 10], "clean": [4, 10], "timestamp_col": [4, 10], "time_series_data_col": [4, 10], "holiday_region": [4, 10], "u": [4, 6, 10], "auto_arima": [4, 10], "adjust_step_chang": [4, 10], "clean_spikes_and_dip": [4, 10], "tailor": [4, 9, 10], "repres": [4, 10], "timestamp": [4, 5, 10], "region": [4, 10], "enabl": [4, 5, 9, 10], "adjust": [4, 10], "spike": [4, 10], "dip": [4, 10], "max_retri": [4, 10], "3": [4, 5, 9, 10], "fail": [4, 6, 10], "automat": [4, 10], "up": [4, 9, 10], "increas": [4, 10], "delai": [4, 10], "attempt": [4, 10], "except": [4, 6, 10], "linearli": [4, 10], "120": [4, 10], "second": [4, 10], "multipli": [4, 10], "current": [4, 9, 10], "my_dataset": [4, 6, 10], "my_tabl": [4, 6, 10], "max_item": [4, 10], "see": [4, 5, 10], "section": [4, 5, 10], "more": [4, 10], "Not": [4, 10], "account": [4, 9, 10], "bill": [4, 10], "re": [4, 10], "limit": [4, 10], "smaller": [4, 10], "than": [4, 6, 10], "4": [4, 5, 10], "run": [4, 10], "might": [4, 10], "incur": [4, 10], "charg": [4, 10], "model_metrics_table_nam": [4, 10], "arima_model_metr": [4, 10], "time_series_timestamp_col": [4, 10], "time_series_id_col": [4, 10], "These": [4, 9, 10], "liquor": [4, 9, 10], "identifi": [4, 10], "convert_to_datetim": [5, 10, 11], "df": [5, 10], "col": [5, 10], "filter_by_d": [5, 10, 11], "datetim": [5, 10], "done": [5, 10], "origin": [5, 10], "appli": [5, 10], "plot_historical_and_forecast": [5, 10, 11], "input_timeseri": [5, 10], "timestamp_col_nam": [5, 10], "data_col_nam": [5, 10], "forecast_output": [5, 10], "forecast_col_nam": [5, 10], "actual_col_nam": [5, 10], "titl": [5, 10], "plot_start_d": [5, 10], "show_peak": [5, 10], "matplotlib": [5, 10], "plot_kwarg": [5, 10], "histor": [5, 10], "visual": [5, 10], "peak": [5, 10], "highlight": [5, 10], "support": [5, 10], "both": [5, 10], "plotli": [5, 10], "line": [5, 10], "differ": [5, 6, 10], "color": [5, 10], "map": [5, 10], "lower_bound": [5, 10], "upper_bound": [5, 10], "ad": [5, 10], "avail": [5, 10], "either": [5, 10], "addit": [5, 10], "detail": [5, 10], "keyword": [5, 10], "argument": [5, 10], "custom": [5, 10], "neither": [5, 10], "nor": [5, 10], "allow": [5, 10], "librari": [5, 10], "get": [5, 6, 10], "prettier": [5, 10], "howev": [5, 10], "instal": [5, 8, 10], "By": [5, 10], "also": [5, 10], "project": [5, 6, 10], "txt": [5, 9, 10], "file": [5, 9, 10], "2023": [5, 10], "01": [5, 10], "02": [5, 10], "10": [5, 10], "15": [5, 10], "sampl": [5, 9, 10], "normalize_item_nam": [6, 10, 11], "convert": [5, 6, 10], "lower": [6, 10], "replac": [6, 9, 10], "space": [6, 10], "underscor": [6, 10], "normal": [6, 10], "tito": [6, 10], "handmad": [6, 10], "titos_handmade_vodka": [6, 10], "uniqu": [6, 10], "split_table_name_info": [6, 10, 11], "ani": [6, 9, 10], "insid": [6, 9, 10], "thei": [6, 10], "my_project": [6, 10], "create_bigquery_table_from_panda": [6, 10, 11], "table_id": [6, 10], "if_exist": [6, 10], "append": [6, 10], "behavior": [6, 10], "when": [6, 10], "alreadi": [6, 9, 10], "exist": [6, 9, 10], "column1": [6, 10], "1": [5, 6, 7, 10], "2": [5, 6, 7, 10], "column2": [6, 10], "b": [6, 10], "create_dataset_if_not_found": [6, 10, 11], "project_id": [6, 10], "dataset_nam": [6, 10], "doe": [6, 10], "infer": [6, 10], "attibut": [6, 10], "other": [6, 10], "error": [6, 9, 10], "new_dataset": [6, 10], "check": [6, 10], "list_tables_with_pattern": [6, 10, 11], "table_pattern": [6, 10], "fulli": [6, 10], "qualifi": [6, 10], "them": [6, 10], "interact": [6, 10], "fnmatch": [6, 10], "ensur": [6, 9, 10], "compat": [6, 10], "sales_": [6, 10], "sales_2021": [6, 10], "sales_2022": [6, 10], "gumshoej": 7, "v5": 7, "patch": 7, "pradyunsg": 7, "simpl": 7, "framework": 7, "agnost": 7, "scrollspi": 7, "script": [7, 9], "2019": 7, "chri": 7, "ferdinandi": 7, "mit": 7, "licens": 7, "github": [7, 9], "cferdinandi": 7, "gumsho": 7, "api": [8, 9, 11], "index": 8, "search": 8, "here": 9, "rst": 9, "your": 9, "iowa": 9, "guid": 9, "instruct": 9, "how": 9, "befor": 9, "begin": 9, "www": [], "org": [], "download": 9, "releas": [], "390": [], "higher": 9, "sdk": 9, "gcloud": 9, "configur": 9, "container": 9, "environ": 9, "git": 9, "scm": [], "clone": 9, "repositori": 9, "sever": 9, "To": 9, "pip": 9, "bash": 9, "r": 9, "storag": 9, "go": 9, "consol": 9, "new": 9, "navig": 9, "authent": 9, "iam": 9, "admin": [], "serviceaccount": [], "In": 9, "click": 9, "ve": 9, "tab": 9, "choos": 9, "json": 9, "button": 9, "google_application_credenti": 9, "variabl": 9, "path": 9, "export": 9, "prefer": 9, "consist": 9, "build": 9, "imag": 9, "root": 9, "directori": 9, "t": 9, "rm": 9, "e": [5, 9, 10], "v": 9, "pwd": 9, "app": 9, "credenti": 9, "mount": 9, "onc": 9, "haven": 9, "yourusernam": [], "cd": 9, "pipelin": 9, "train_model_and_forecast_sal": 9, "py": 9, "make": 9, "sure": 9, "correctli": 9, "encount": 9, "issu": 9, "dure": 9, "some": 9, "common": 9, "solut": 9, "miss": 9, "via": 9, "verifi": 9, "system": 9, "ha": 9, "enough": 9, "resourc": 9, "alloc": 9, "further": 9, "assist": [5, 9, 10], "document": 9, "html": [], "content": 11, "relat": [5, 10], "oper": [5, 10], "offer": [5, 10], "flexibl": [5, 10], "particularli": [5, 10], "element": [5, 10], "take": [5, 10], "chain": [5, 10], "pipe": [5, 10], "dtype": [5, 10], "m8": [5, 10], "plot_seri": [5, 10, 11], "x_data": [5, 10], "y_data": [5, 10], "label": [5, 10], "linestyl": [5, 10], "kwarg": [5, 10], "marker": [5, 10], "style": [5, 10], "arrai": [5, 10], "like": [5, 10], "x": [5, 10], "axi": [5, 10], "y": [5, 10], "legend": [5, 10], "g": [5, 10], "solid": [5, 10], "dash": [5, 10], "scatter": [5, 10], "20": [5, 10], "red": [5, 10], "erik": 9, "ingwersen": 9, "ei": 9}, "objects": {"": [[10, 0, 0, "-", "iowa_forecast"]], "iowa_forecast": [[10, 0, 0, "-", "load_data"], [10, 0, 0, "-", "ml_eval"], [10, 0, 0, "-", "ml_train"], [10, 0, 0, "-", "plots"], [10, 0, 0, "-", "utils"]], "iowa_forecast.load_data": [[10, 1, 1, "", "create_forecast_features_query"], [10, 1, 1, "", "create_future_data"], [10, 1, 1, "", "create_future_feature_table"], [10, 1, 1, "", "create_future_feature_tables"], [10, 1, 1, "", "date_offset"], [10, 1, 1, "", "get_item_names_filter"], [10, 1, 1, "", "get_min_datapoints_filter"], [10, 1, 1, "", "get_training_data"], [10, 1, 1, "", "get_weather_query"], [10, 1, 1, "", "get_year_weather_query"]], "iowa_forecast.ml_eval": [[10, 1, 1, "", "create_query"], [10, 1, 1, "", "evaluate_models"], [10, 1, 1, "", "evaluate_predictions"], [10, 1, 1, "", "explain_model"], [10, 1, 1, "", "get_actual_data"], [10, 1, 1, "", "get_data"], [10, 1, 1, "", "get_predictions"], [10, 1, 1, "", "get_train_data"], [10, 1, 1, "", "multi_evaluate_predictions"]], "iowa_forecast.ml_train": [[10, 1, 1, "", "create_model_query"], [10, 1, 1, "", "create_models_for_items"], [10, 1, 1, "", "execute_query_with_retries"], [10, 1, 1, "", "train_arima_models"]], "iowa_forecast.plots": [[10, 1, 1, "", "convert_to_datetime"], [10, 1, 1, "", "filter_by_date"], [10, 1, 1, "", "plot_historical_and_forecast"], [10, 1, 1, "", "plot_series"]], "iowa_forecast.utils": [[10, 1, 1, "", "create_bigquery_table_from_pandas"], [10, 1, 1, "", "create_dataset_if_not_found"], [10, 1, 1, "", "list_tables_with_pattern"], [10, 1, 1, "", "normalize_item_name"], [10, 1, 1, "", "split_table_name_info"]]}, "objtypes": {"0": "py:module", "1": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "function", "Python function"]}, "titleterms": {"api": [0, 10], "refer": [0, 10], "iowa_forecast": [0, 1, 10, 11], "load_data": [2, 10], "function": [2, 3, 4, 5, 10], "note": [2, 3, 4, 5, 6, 10], "exampl": [2, 4, 5, 6, 10], "ml_eval": [3, 10], "ml_train": [4, 10], "plot": [5, 10], "util": [6, 10], "iowa": 8, "liquor": 8, "sale": 8, "forecast": 8, "modul": [8, 10], "content": [8, 10], "indic": 8, "tabl": 8, "instal": 9, "requir": 9, "python": 9, "depend": 9, "googl": 9, "cloud": 9, "setup": 9, "docker": 9, "option": 9, "run": 9, "project": 9, "troubleshoot": 9}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx.ext.intersphinx": 1, "sphinx": 58}, "alltitles": {"API Reference": [[0, "api-reference"]], "iowa_forecast": [[0, "iowa-forecast"], [1, "iowa-forecast"], [11, "iowa-forecast"]], "load_data": [[2, "module-iowa_forecast.load_data"]], "Functions": [[2, "functions"], [3, "functions"], [4, "functions"], [5, "functions"], [10, "functions"], [10, "id1"], [10, "id2"], [10, "id3"]], "Notes": [[2, null], [2, null], [3, null], [4, null], [4, null], [4, null], [5, null], [5, null], [5, null], [6, null], [6, null], [6, null], [10, null], [10, null], [10, null], [10, null], [10, null], [10, null], [10, null], [10, null], [10, null], [10, null], [10, null], [10, null]], "Examples": [[2, null], [2, null], [4, null], [5, null], [5, null], [5, null], [6, null], [6, null], [6, null], [6, null], [6, null], [10, null], [10, null], [10, null], [10, null], [10, null], [10, null], [10, null], [10, null], [10, null], [10, null], [10, null]], "ml_eval": [[3, "module-iowa_forecast.ml_eval"]], "ml_train": [[4, "module-iowa_forecast.ml_train"]], "plots": [[5, "module-iowa_forecast.plots"]], "utils": [[6, "module-iowa_forecast.utils"]], "Iowa Liquor Sales Forecast": [[8, "iowa-liquor-sales-forecast"]], "Modules": [[8, "modules"]], "Contents:": [[8, null]], "Indices and tables": [[8, "indices-and-tables"]], "Installation": [[9, "installation"]], "Requirements": [[9, "requirements"]], "Python Dependencies": [[9, "python-dependencies"]], "Google Cloud Setup": [[9, "google-cloud-setup"]], "Docker Setup (Optional)": [[9, "docker-setup-optional"]], "Running the Project": [[9, "running-the-project"]], "Troubleshooting": [[9, "troubleshooting"]], "API reference": [[10, "api-reference"]], "iowa_forecast.load_data": [[10, "module-iowa_forecast.load_data"]], "iowa_forecast.ml_eval": [[10, "module-iowa_forecast.ml_eval"]], "iowa_forecast.ml_train": [[10, "module-iowa_forecast.ml_train"]], "iowa_forecast.plots": [[10, "module-iowa_forecast.plots"]], "iowa_forecast.utils": [[10, "module-iowa_forecast.utils"]], "Module contents": [[10, "module-iowa_forecast"]]}, "indexentries": {"create_forecast_features_query() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.create_forecast_features_query"], [10, "iowa_forecast.load_data.create_forecast_features_query"]], "create_future_data() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.create_future_data"], [10, "iowa_forecast.load_data.create_future_data"]], "create_future_feature_table() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.create_future_feature_table"], [10, "iowa_forecast.load_data.create_future_feature_table"]], "create_future_feature_tables() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.create_future_feature_tables"], [10, "iowa_forecast.load_data.create_future_feature_tables"]], "date_offset() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.date_offset"], [10, "iowa_forecast.load_data.date_offset"]], "get_item_names_filter() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.get_item_names_filter"], [10, "iowa_forecast.load_data.get_item_names_filter"]], "get_min_datapoints_filter() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.get_min_datapoints_filter"], [10, "iowa_forecast.load_data.get_min_datapoints_filter"]], "get_training_data() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.get_training_data"], [10, "iowa_forecast.load_data.get_training_data"]], "get_weather_query() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.get_weather_query"], [10, "iowa_forecast.load_data.get_weather_query"]], "get_year_weather_query() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.get_year_weather_query"], [10, "iowa_forecast.load_data.get_year_weather_query"]], "iowa_forecast.load_data": [[2, "module-iowa_forecast.load_data"], [10, "module-iowa_forecast.load_data"]], "module": [[2, "module-iowa_forecast.load_data"], [3, "module-iowa_forecast.ml_eval"], [4, "module-iowa_forecast.ml_train"], [5, "module-iowa_forecast.plots"], [6, "module-iowa_forecast.utils"], [10, "module-iowa_forecast"], [10, "module-iowa_forecast.load_data"], [10, "module-iowa_forecast.ml_eval"], [10, "module-iowa_forecast.ml_train"], [10, "module-iowa_forecast.plots"], [10, "module-iowa_forecast.utils"]], "create_query() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.create_query"], [10, "iowa_forecast.ml_eval.create_query"]], "evaluate_models() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.evaluate_models"], [10, "iowa_forecast.ml_eval.evaluate_models"]], "evaluate_predictions() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.evaluate_predictions"], [10, "iowa_forecast.ml_eval.evaluate_predictions"]], "explain_model() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.explain_model"], [10, "iowa_forecast.ml_eval.explain_model"]], "get_actual_data() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.get_actual_data"], [10, "iowa_forecast.ml_eval.get_actual_data"]], "get_data() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.get_data"], [10, "iowa_forecast.ml_eval.get_data"]], "get_predictions() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.get_predictions"], [10, "iowa_forecast.ml_eval.get_predictions"]], "get_train_data() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.get_train_data"], [10, "iowa_forecast.ml_eval.get_train_data"]], "iowa_forecast.ml_eval": [[3, "module-iowa_forecast.ml_eval"], [10, "module-iowa_forecast.ml_eval"]], "multi_evaluate_predictions() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.multi_evaluate_predictions"], [10, "iowa_forecast.ml_eval.multi_evaluate_predictions"]], "create_model_query() (in module iowa_forecast.ml_train)": [[4, "iowa_forecast.ml_train.create_model_query"], [10, "iowa_forecast.ml_train.create_model_query"]], "create_models_for_items() (in module iowa_forecast.ml_train)": [[4, "iowa_forecast.ml_train.create_models_for_items"], [10, "iowa_forecast.ml_train.create_models_for_items"]], "execute_query_with_retries() (in module iowa_forecast.ml_train)": [[4, "iowa_forecast.ml_train.execute_query_with_retries"], [10, "iowa_forecast.ml_train.execute_query_with_retries"]], "iowa_forecast.ml_train": [[4, "module-iowa_forecast.ml_train"], [10, "module-iowa_forecast.ml_train"]], "train_arima_models() (in module iowa_forecast.ml_train)": [[4, "iowa_forecast.ml_train.train_arima_models"], [10, "iowa_forecast.ml_train.train_arima_models"]], "convert_to_datetime() (in module iowa_forecast.plots)": [[5, "iowa_forecast.plots.convert_to_datetime"], [10, "iowa_forecast.plots.convert_to_datetime"]], "filter_by_date() (in module iowa_forecast.plots)": [[5, "iowa_forecast.plots.filter_by_date"], [10, "iowa_forecast.plots.filter_by_date"]], "iowa_forecast.plots": [[5, "module-iowa_forecast.plots"], [10, "module-iowa_forecast.plots"]], "plot_historical_and_forecast() (in module iowa_forecast.plots)": [[5, "iowa_forecast.plots.plot_historical_and_forecast"], [10, "iowa_forecast.plots.plot_historical_and_forecast"]], "plot_series() (in module iowa_forecast.plots)": [[5, "iowa_forecast.plots.plot_series"], [10, "iowa_forecast.plots.plot_series"]], "create_bigquery_table_from_pandas() (in module iowa_forecast.utils)": [[6, "iowa_forecast.utils.create_bigquery_table_from_pandas"], [10, "iowa_forecast.utils.create_bigquery_table_from_pandas"]], "create_dataset_if_not_found() (in module iowa_forecast.utils)": [[6, "iowa_forecast.utils.create_dataset_if_not_found"], [10, "iowa_forecast.utils.create_dataset_if_not_found"]], "iowa_forecast.utils": [[6, "module-iowa_forecast.utils"], [10, "module-iowa_forecast.utils"]], "list_tables_with_pattern() (in module iowa_forecast.utils)": [[6, "iowa_forecast.utils.list_tables_with_pattern"], [10, "iowa_forecast.utils.list_tables_with_pattern"]], "normalize_item_name() (in module iowa_forecast.utils)": [[6, "iowa_forecast.utils.normalize_item_name"], [10, "iowa_forecast.utils.normalize_item_name"]], "split_table_name_info() (in module iowa_forecast.utils)": [[6, "iowa_forecast.utils.split_table_name_info"], [10, "iowa_forecast.utils.split_table_name_info"]], "iowa_forecast": [[10, "module-iowa_forecast"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["api_reference/index", "api_reference/iowa_forecast/index", "api_reference/iowa_forecast/load_data", "api_reference/iowa_forecast/ml_eval", "api_reference/iowa_forecast/ml_train", "api_reference/iowa_forecast/models_configs", "api_reference/iowa_forecast/plots", "api_reference/iowa_forecast/utils", "docs/html/_static/scripts/furo.js.LICENSE", "index", "installation"], "filenames": ["api_reference/index.rst", "api_reference/iowa_forecast/index.rst", "api_reference/iowa_forecast/load_data.rst", "api_reference/iowa_forecast/ml_eval.rst", "api_reference/iowa_forecast/ml_train.rst", "api_reference/iowa_forecast/models_configs.rst", "api_reference/iowa_forecast/plots.rst", "api_reference/iowa_forecast/utils.rst", "docs/html/_static/scripts/furo.js.LICENSE.txt", "index.rst", "installation.rst"], "titles": ["API Reference", "iowa_forecast", "load_data", "ml_eval", "ml_train", "models_configs", "plots", "utils", "<no title>", "Iowa Liquor Sales Forecast", "Installation"], "terms": {"thi": [0, 2, 3, 4, 5, 6, 7, 10], "page": [0, 9, 10], "give": 0, "an": [0, 2, 3, 4, 5, 10], "overview": 0, "all": [0, 4, 5, 6, 7, 10], "public": [0, 2], "panda": [0, 2, 3, 6, 7], "object": [0, 2, 7], "function": [0, 5, 7], "method": 0, "expos": 0, "namespac": 0, "ar": [0, 2, 3, 4, 5, 7, 10], "The": [0, 2, 3, 4, 5, 6, 7, 10], "packag": [0, 10], "contain": [0, 2, 3, 4, 5, 6, 7, 10], "follow": [0, 2, 3, 5, 10], "modul": [0, 2, 3, 4, 5, 6, 7], "load_data": [0, 1], "bigqueri": [0, 2, 3, 4, 7, 10], "data": [0, 2, 3, 4, 6, 10], "load": [0, 2, 7], "featur": [0, 2, 4], "engin": [0, 2, 6], "ml_eval": [0, 1], "model": [0, 2, 3, 4, 5, 7, 10], "evalu": [0, 3, 4], "forecast": [0, 2, 3, 4, 6, 10], "ml_train": [0, 1], "train": [0, 2, 3, 4, 5, 10], "execut": [0, 3, 4, 10], "plot": [0, 1], "time": [0, 2, 3, 4, 6, 7], "seri": [0, 2, 3, 4, 6], "date": [0, 2, 3, 4, 6], "handl": [0, 2, 4, 5, 6], "util": [0, 1, 2, 3, 10], "gener": [0, 2, 3, 4, 6, 7, 10], "provid": [2, 3, 4, 5, 6, 7, 10], "process": [2, 4, 5], "prepar": [2, 6], "us": [2, 3, 4, 5, 6, 7, 10], "googl": [2, 3, 4], "includ": [2, 3, 4, 5, 6], "creat": [2, 3, 4, 7, 10], "dataset": [2, 4, 7, 10], "offset": [2, 7], "item": [2, 3, 4, 7], "filter": [2, 3, 6, 7], "date_offset": [2, 7], "dateoffset": [2, 7], "base": [2, 3, 4, 5, 7], "given": [2, 4, 7], "frequenc": [2, 7], "valu": [2, 3, 4, 5, 6, 7], "get_item_names_filt": 2, "where": [2, 3, 4, 5, 7, 10], "claus": 2, "compon": [2, 3, 4, 7], "from": [2, 3, 4, 5, 7], "column": [2, 3, 4, 6], "item_nam": [2, 3, 4, 7], "get_min_datapoints_filt": 2, "have": [2, 6, 10], "least": 2, "min_siz": 2, "observ": 2, "get_training_data": 2, "retriev": [2, 3, 7, 10], "view": 2, "get_year_weather_queri": 2, "sql": [2, 3, 4], "queri": [2, 3, 4], "weather": 2, "specif": [2, 5, 7], "year": [2, 7], "state": 2, "get_weather_queri": 2, "rang": [2, 3, 5], "create_forecast_features_queri": 2, "join": 2, "tabl": [2, 3, 4, 7], "create_future_data": 2, "futur": [2, 4], "test": [2, 3, 4], "create_future_feature_t": 2, "save": 2, "result": [2, 3], "multipl": [2, 3], "i": [2, 3, 4, 6, 7, 10], "design": [2, 3, 4, 6], "work": [2, 3, 4], "requir": [2, 3, 4, 5, 6], "valid": [0, 2, 3, 4, 5, 7, 10], "client": [2, 3, 4, 7], "instanc": [2, 3, 4], "focu": 2, "variou": [2, 3, 5], "busi": [2, 3], "context": [2, 3], "iowa_forecast": [2, 3, 4, 5, 6, 7, 9], "n": [6, 7], "int": [2, 3, 4, 7], "freq": [2, 7], "str": [2, 3, 4, 5, 6, 7], "sourc": [2, 3, 4, 5, 6, 7], "paramet": [0, 2, 3, 4, 5, 6, 7], "number": [2, 3, 4, 7], "unit": [], "dai": [2, 3, 4, 7], "week": 7, "month": 7, "type": [2, 3, 4, 5, 6, 7, 10], "option": [2, 3, 4, 6, 7], "return": [2, 3, 4, 5, 6, 7], "pd": [2, 3, 6, 7], "A": [2, 3, 4, 5, 7, 8], "specifi": [2, 3, 4, 5, 6, 7], "rais": [2, 4, 6, 7], "valueerror": [2, 6, 7], "If": [2, 3, 4, 6, 7, 10], "one": [2, 3, 7, 10], "items_list": [2, 3, 4], "list": [2, 3, 4, 5, 7, 10], "name": [2, 3, 4, 5, 6, 7], "add": [2, 6, 10], "can": [2, 3, 5, 6, 10], "print": [2, 5, 7], "five": 2, "o": [2, 6], "clock": 2, "vodka": [2, 7], "firebal": 2, "cinnamon": 2, "whiskei": 2, "black": 2, "velvet": 2, "OR": 2, "minimum": 2, "table_nam": [2, 3, 7], "bqmlforecast": [2, 3, 4, 7], "training_data": [2, 3, 4], "start_dat": [2, 6], "none": [2, 3, 4, 6, 7], "end_dat": [2, 3], "min_datapoints_r": 2, "float": [2, 3, 4], "0": [2, 3, 4, 7], "75": 2, "base_t": 2, "iowa_liquor_sal": 2, "sale": [2, 4, 10], "datafram": [2, 3, 6, 7], "construct": [2, 4, 7], "condit": 2, "default": [2, 3, 4, 5, 6, 7], "store": [2, 4, 10], "start": [2, 6, 10], "yyyi": 2, "mm": 2, "dd": 2, "format": [2, 6], "determin": 2, "wai": 2, "end": [2, 3], "equal": [2, 4], "todai": 2, "": [2, 3, 7, 10], "calcul": 2, "fraction": 2, "between": [2, 4], "each": [2, 3, 4, 7], "should": [2, 3, 4, 5, 7], "point": [2, 6, 10], "consid": [2, 4], "singl": 2, "extract": [2, 7], "ia": 2, "which": [2, 3, 4, 5, 6], "code": [2, 4], "string": [2, 3, 4, 7], "dataset_id": [2, 7], "forecast_tables_pattern": 2, "forecast_": 2, "connect": [2, 7], "servic": [2, 7, 10], "id": [2, 7], "locat": [2, 7], "pattern": [2, 5, 7], "match": [2, 7], "select": [2, 3, 4, 10], "cast": 2, "t1": 2, "forecast_timestamp": 2, "AS": 2, "total_amount_sold": [2, 4], "t2": 2, "forecast_valu": 2, "temp": 2, "t3": 2, "rainfal": 2, "t4": 2, "snowfal": 2, "fw": 2, "temperatur": 2, "forecast_temp": 2, "inner": 2, "forecast_rainfal": 2, "ON": 2, "AND": 2, "forecast_snowfal": 2, "left": 2, "future_weather_data": 2, "comprehens": 2, "train_table_nam": [2, 3, 4], "test_table_nam": [2, 4], "test_data": [2, 3, 4], "forecast_table_nam": [2, 3], "forecast_data": [2, 3], "horizon": [2, 3, 4], "7": [2, 3, 4], "It": [2, 3, 5, 6], "inform": [2, 4, 7], "lag": 2, "model_nam": [2, 4], "confidence_level": [2, 3, 4, 6], "9": [2, 4, 10], "confid": [2, 3, 4], "level": [2, 3, 4], "arima_model": [2, 4], "table_base_nam": 2, "set": [3, 4, 5, 6, 10], "explain": 3, "well": 3, "aggreg": 3, "across": 3, "evaluate_model": 3, "arima_plus_xreg": [3, 4, 5], "perform": [3, 4], "metric": [3, 4], "get_data": 3, "create_queri": 3, "get_train_data": 3, "get_actual_data": 3, "actual": [3, 6, 10], "get_predict": 3, "predict": 3, "evaluate_predict": 3, "against": [3, 7], "comparison": 3, "multi_evaluate_predict": 3, "dictionari": [3, 5, 6, 7], "explain_model": 3, "explan": 3, "primarili": 3, "intend": [3, 4, 5], "perform_aggreg": 3, "bool": [3, 4, 6], "true": [3, 4, 5, 6], "arima_plus_xreg_model": [3, 4], "actual_table_nam": 3, "period": 3, "maximum": [3, 4], "step": [3, 4, 10], "ahead": 3, "whether": [3, 4, 6], "date_filt": 3, "order_bi": 3, "databas": 3, "order": 3, "For": [3, 10], "exampl": 3, "you": [3, 4, 6, 10], "want": [3, 6], "sort": 3, "8": 3, "tupl": [3, 5, 7], "compar": [3, 6], "two": [3, 7], "dict": [3, 5, 6, 7], "kei": [3, 5, 6, 7, 10], "sub": 3, "train_df": 3, "eval_df": 3, "ml": 3, "explain_forecast": 3, "http": [3, 8, 10], "cloud": [3, 4], "com": [3, 8, 10], "doc": 3, "refer": [3, 9, 10], "standard": [3, 5], "bigqueryml": 3, "syntax": 3, "manag": [0, 4, 5], "retri": 4, "create_model_queri": 4, "its": [4, 7], "associ": 4, "execute_query_with_retri": 4, "logic": 4, "case": [4, 7], "failur": 4, "create_models_for_item": 4, "train_arima_model": 4, "arima": [4, 5, 7], "correspond": [4, 7], "holidai": 4, "effect": 4, "chang": [4, 6], "clean": 4, "timestamp_col": 4, "time_series_data_col": 4, "holiday_region": 4, "u": [4, 7], "auto_arima": 4, "adjust_step_chang": 4, "clean_spikes_and_dip": 4, "tailor": [4, 10], "repres": 4, "timestamp": [4, 6], "region": 4, "enabl": [4, 6, 10], "adjust": 4, "spike": 4, "dip": 4, "max_retri": 4, "3": [4, 6, 10], "fail": [4, 7], "automat": 4, "up": [4, 10], "increas": 4, "delai": 4, "attempt": 4, "except": [4, 7], "linearli": 4, "120": 4, "second": 4, "multipli": 4, "current": [4, 10], "my_dataset": [4, 7], "my_tabl": [4, 7], "max_item": 4, "see": [4, 6], "section": [4, 6], "more": 4, "Not": 4, "account": [4, 10], "bill": 4, "re": 4, "limit": 4, "smaller": 4, "than": [4, 7], "4": [4, 6], "run": 4, "might": 4, "incur": 4, "charg": 4, "model_metrics_table_nam": 4, "arima_model_metr": 4, "time_series_timestamp_col": 4, "time_series_id_col": 4, "These": [4, 5, 10], "liquor": [4, 10], "identifi": 4, "convert_to_datetim": 6, "df": 6, "col": 6, "filter_by_d": 6, "datetim": 6, "done": 6, "origin": 6, "appli": 6, "plot_historical_and_forecast": 6, "input_timeseri": 6, "timestamp_col_nam": 6, "data_col_nam": 6, "forecast_output": 6, "forecast_col_nam": 6, "actual_col_nam": 6, "titl": 6, "plot_start_d": 6, "show_peak": 6, "matplotlib": 6, "plot_kwarg": 6, "histor": 6, "visual": 6, "peak": 6, "highlight": 6, "support": 6, "both": 6, "plotli": 6, "line": 6, "differ": [6, 7], "color": 6, "map": 6, "lower_bound": 6, "upper_bound": 6, "ad": 6, "avail": [4, 6], "either": 6, "addit": [4, 5, 6], "detail": 6, "keyword": [4, 6], "argument": [4, 6, 7], "custom": 6, "neither": 6, "nor": 6, "allow": [4, 6], "librari": 6, "get": [6, 7], "prettier": 6, "howev": 6, "instal": [6, 9], "By": [5, 6], "also": 6, "project": [6, 7], "txt": [6, 10], "file": [6, 10], "2023": 6, "01": 6, "02": 6, "10": 6, "15": 6, "sampl": [6, 10], "normalize_item_nam": 7, "convert": [6, 7], "lower": 7, "replac": [7, 10], "space": 7, "underscor": 7, "normal": 7, "tito": 7, "handmad": 7, "titos_handmade_vodka": 7, "uniqu": 7, "split_table_name_info": 7, "ani": [4, 5, 7, 10], "insid": [7, 10], "thei": [5, 7], "my_project": 7, "create_bigquery_table_from_panda": 7, "table_id": 7, "if_exist": 7, "append": 7, "behavior": 7, "when": 7, "alreadi": [7, 10], "exist": [7, 10], "column1": 7, "1": [6, 7, 8], "2": [6, 7, 8], "column2": 7, "b": 7, "create_dataset_if_not_found": 7, "project_id": 7, "dataset_nam": 7, "doe": 7, "infer": 7, "attibut": 7, "other": [5, 7], "error": [5, 7, 10], "new_dataset": 7, "check": [5, 7], "list_tables_with_pattern": 7, "table_pattern": 7, "fulli": 7, "qualifi": 7, "them": 7, "interact": 7, "fnmatch": 7, "ensur": [5, 7, 10], "compat": 7, "sales_": 7, "sales_2021": 7, "sales_2022": 7, "gumshoej": 8, "v5": 8, "patch": 8, "pradyunsg": 8, "simpl": 8, "framework": 8, "agnost": 8, "scrollspi": 8, "script": [8, 10], "2019": 8, "chri": 8, "ferdinandi": 8, "mit": 8, "licens": 8, "github": [8, 10], "cferdinandi": 8, "gumsho": 8, "api": [9, 10], "index": 9, "search": 9, "here": 10, "rst": 10, "your": 10, "iowa": 10, "guid": 10, "instruct": 10, "how": 10, "befor": [5, 10], "begin": 10, "www": [], "org": [], "download": 10, "releas": [], "390": [], "higher": 10, "sdk": 10, "gcloud": 10, "configur": [0, 5, 10], "container": 10, "environ": 10, "git": 10, "scm": [], "clone": 10, "repositori": 10, "sever": 10, "To": 10, "pip": 10, "bash": 10, "r": 10, "storag": 10, "go": 10, "consol": 10, "new": 10, "navig": 10, "authent": 10, "iam": 10, "admin": [], "serviceaccount": [], "In": 10, "click": 10, "ve": 10, "tab": 10, "choos": 10, "json": 10, "button": 10, "google_application_credenti": 10, "variabl": [5, 10], "path": 10, "export": 10, "prefer": 10, "consist": [5, 10], "build": 10, "imag": 10, "root": 10, "directori": 10, "t": [4, 10], "rm": 10, "e": [6, 7, 10], "v": 10, "pwd": 10, "app": 10, "credenti": 10, "mount": 10, "onc": 10, "haven": 10, "yourusernam": [], "cd": 10, "pipelin": 10, "train_model_and_forecast_sal": 10, "py": 10, "make": 10, "sure": 10, "correctli": [5, 10], "encount": 10, "issu": 10, "dure": [4, 10], "some": [5, 10], "common": [5, 10], "solut": 10, "miss": 10, "via": [5, 10], "verifi": 10, "system": 10, "ha": 10, "enough": 10, "resourc": 10, "alloc": 10, "further": 10, "assist": [6, 10], "document": 10, "html": [], "content": [], "models_config": [0, 1], "class": 0, "machin": 5, "learn": 5, "focus": 5, "deriv": 5, "abstractbasemodelconfig": 5, "abc": 5, "abstract": 5, "basemodelconfig": 5, "subclass": 5, "defin": 5, "supported_paramet": 5, "expect": 5, "choic": 5, "arimaconfig": 5, "inherit": 5, "arima_plu": 5, "adher": 5, "arima_plus_xreg_config": 5, "extend": 5, "exogen": 5, "xreg_featur": 5, "setup": 5, "pass": 5, "leverag": 5, "develop": 5, "fall": 5, "within": 5, "reduc": 5, "likelihood": 5, "runtim": 5, "config": 5, "model_typ": 5, "xreg_config": 5, "feature1": 5, "feature2": 5, "non_negative_forecast": 5, "attribut": 5, "properti": 5, "must": 5, "implement": 5, "kwarg": [4, 5, 6], "unpack": 5, "restrict": 5, "erik": 10, "ingwersen": 10, "ei": 10, "include_test_on_model_train": 4, "union": 4, "being": 4, "include_test_on_arima_model_train": 4, "uni": 4, "variat": 4, "onli": 4, "shouldn": 4, "therefor": 4, "do": 4, "so": 4, "caus": 4, "turn": 4, "use_test_data_on_train": 4, "relat": 6, "oper": 6, "offer": 6, "flexibl": 6, "particularli": 6, "element": 6, "take": 6, "chain": 6, "pipe": 6, "dtype": 6, "m8": 6, "plot_seri": 6, "x_data": 6, "y_data": 6, "label": 6, "linestyl": 6, "marker": 6, "style": 6, "arrai": 6, "like": 6, "x": 6, "axi": 6, "y": 6, "legend": 6, "g": [6, 7], "solid": 6, "dash": 6, "scatter": 6, "20": 6, "red": 6, "parse_combined_str": 7, "combin": 7, "pars": 7, "2y3m2w1d": 7, "invalid": 7, "create_date_offset_from_part": 7, "individu": 7, "arg": 7, "ignor": 7}, "objects": {"iowa_forecast": [[2, 0, 0, "-", "load_data"], [3, 0, 0, "-", "ml_eval"], [4, 0, 0, "-", "ml_train"], [5, 0, 0, "-", "models_configs"], [6, 0, 0, "-", "plots"], [7, 0, 0, "-", "utils"]], "iowa_forecast.load_data": [[2, 1, 1, "", "create_forecast_features_query"], [2, 1, 1, "", "create_future_data"], [2, 1, 1, "", "create_future_feature_table"], [2, 1, 1, "", "create_future_feature_tables"], [2, 1, 1, "", "get_item_names_filter"], [2, 1, 1, "", "get_min_datapoints_filter"], [2, 1, 1, "", "get_training_data"], [2, 1, 1, "", "get_weather_query"], [2, 1, 1, "", "get_year_weather_query"]], "iowa_forecast.ml_eval": [[3, 1, 1, "", "create_query"], [3, 1, 1, "", "evaluate_models"], [3, 1, 1, "", "evaluate_predictions"], [3, 1, 1, "", "explain_model"], [3, 1, 1, "", "get_actual_data"], [3, 1, 1, "", "get_data"], [3, 1, 1, "", "get_predictions"], [3, 1, 1, "", "get_train_data"], [3, 1, 1, "", "multi_evaluate_predictions"]], "iowa_forecast.ml_train": [[4, 1, 1, "", "create_model_query"], [4, 1, 1, "", "create_models_for_items"], [4, 1, 1, "", "execute_query_with_retries"], [4, 1, 1, "", "include_test_on_arima_model_train"], [4, 1, 1, "", "include_test_on_model_train"], [4, 1, 1, "", "train_arima_models"]], "iowa_forecast.models_configs": [[5, 2, 1, "", "ARIMAConfig"], [5, 2, 1, "", "ARIMA_PLUS_XREG_Config"], [5, 2, 1, "", "AbstractBaseModelConfig"], [5, 2, 1, "", "BaseModelConfig"]], "iowa_forecast.models_configs.ARIMAConfig": [[5, 3, 1, "", "SUPPORTED_PARAMETERS"]], "iowa_forecast.models_configs.ARIMA_PLUS_XREG_Config": [[5, 3, 1, "", "SUPPORTED_PARAMETERS"]], "iowa_forecast.models_configs.AbstractBaseModelConfig": [[5, 3, 1, "", "SUPPORTED_PARAMETERS"]], "iowa_forecast.models_configs.BaseModelConfig": [[5, 3, 1, "", "SUPPORTED_PARAMETERS"]], "iowa_forecast.plots": [[6, 1, 1, "", "convert_to_datetime"], [6, 1, 1, "", "filter_by_date"], [6, 1, 1, "", "plot_historical_and_forecast"], [6, 1, 1, "", "plot_series"]], "iowa_forecast.utils": [[7, 1, 1, "", "create_bigquery_table_from_pandas"], [7, 1, 1, "", "create_dataset_if_not_found"], [7, 1, 1, "", "create_date_offset_from_parts"], [7, 1, 1, "", "date_offset"], [7, 1, 1, "", "list_tables_with_pattern"], [7, 1, 1, "", "normalize_item_name"], [7, 1, 1, "", "parse_combined_string"], [7, 1, 1, "", "split_table_name_info"]]}, "objtypes": {"0": "py:module", "1": "py:function", "2": "py:class", "3": "py:property"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "function", "Python function"], "2": ["py", "class", "Python class"], "3": ["py", "property", "Python property"]}, "titleterms": {"api": 0, "refer": 0, "iowa_forecast": [0, 1], "load_data": 2, "function": [2, 3, 4, 6], "note": [2, 3, 4, 6, 7], "exampl": [2, 4, 5, 6, 7], "ml_eval": 3, "ml_train": 4, "plot": 6, "util": 7, "iowa": 9, "liquor": 9, "sale": 9, "forecast": 9, "modul": 9, "content": 9, "indic": 9, "tabl": 9, "instal": 10, "requir": 10, "python": 10, "depend": 10, "googl": 10, "cloud": 10, "setup": 10, "docker": 10, "option": 10, "run": 10, "project": 10, "troubleshoot": 10, "models_config": 5, "class": 5, "usag": 5}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx.ext.intersphinx": 1, "sphinx": 58}, "alltitles": {"API Reference": [[0, "api-reference"]], "iowa_forecast": [[0, "iowa-forecast"], [1, "iowa-forecast"]], "load_data": [[2, "module-iowa_forecast.load_data"]], "Functions": [[2, "functions"], [3, "functions"], [4, "functions"], [6, "functions"]], "Notes": [[2, null], [2, null], [3, null], [4, null], [4, null], [4, null], [6, null], [6, null], [6, null], [7, null], [7, null], [7, null]], "Examples": [[2, null], [2, null], [4, null], [6, null], [6, null], [6, null], [7, null], [7, null], [7, null], [7, null], [7, null]], "ml_eval": [[3, "module-iowa_forecast.ml_eval"]], "ml_train": [[4, "module-iowa_forecast.ml_train"]], "models_configs": [[5, "module-iowa_forecast.models_configs"]], "Classes": [[5, "classes"]], "Usage": [[5, "usage"]], "Example": [[5, null]], "plots": [[6, "module-iowa_forecast.plots"]], "utils": [[7, "module-iowa_forecast.utils"]], "Iowa Liquor Sales Forecast": [[9, "iowa-liquor-sales-forecast"]], "Modules": [[9, "modules"]], "Contents:": [[9, null]], "Indices and tables": [[9, "indices-and-tables"]], "Installation": [[10, "installation"]], "Requirements": [[10, "requirements"]], "Python Dependencies": [[10, "python-dependencies"]], "Google Cloud Setup": [[10, "google-cloud-setup"]], "Docker Setup (Optional)": [[10, "docker-setup-optional"]], "Running the Project": [[10, "running-the-project"]], "Troubleshooting": [[10, "troubleshooting"]]}, "indexentries": {"create_forecast_features_query() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.create_forecast_features_query"]], "create_future_data() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.create_future_data"]], "create_future_feature_table() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.create_future_feature_table"]], "create_future_feature_tables() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.create_future_feature_tables"]], "get_item_names_filter() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.get_item_names_filter"]], "get_min_datapoints_filter() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.get_min_datapoints_filter"]], "get_training_data() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.get_training_data"]], "get_weather_query() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.get_weather_query"]], "get_year_weather_query() (in module iowa_forecast.load_data)": [[2, "iowa_forecast.load_data.get_year_weather_query"]], "iowa_forecast.load_data": [[2, "module-iowa_forecast.load_data"]], "module": [[2, "module-iowa_forecast.load_data"], [3, "module-iowa_forecast.ml_eval"], [4, "module-iowa_forecast.ml_train"], [5, "module-iowa_forecast.models_configs"], [6, "module-iowa_forecast.plots"], [7, "module-iowa_forecast.utils"]], "create_query() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.create_query"]], "evaluate_models() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.evaluate_models"]], "evaluate_predictions() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.evaluate_predictions"]], "explain_model() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.explain_model"]], "get_actual_data() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.get_actual_data"]], "get_data() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.get_data"]], "get_predictions() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.get_predictions"]], "get_train_data() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.get_train_data"]], "iowa_forecast.ml_eval": [[3, "module-iowa_forecast.ml_eval"]], "multi_evaluate_predictions() (in module iowa_forecast.ml_eval)": [[3, "iowa_forecast.ml_eval.multi_evaluate_predictions"]], "create_model_query() (in module iowa_forecast.ml_train)": [[4, "iowa_forecast.ml_train.create_model_query"]], "create_models_for_items() (in module iowa_forecast.ml_train)": [[4, "iowa_forecast.ml_train.create_models_for_items"]], "execute_query_with_retries() (in module iowa_forecast.ml_train)": [[4, "iowa_forecast.ml_train.execute_query_with_retries"]], "include_test_on_arima_model_train() (in module iowa_forecast.ml_train)": [[4, "iowa_forecast.ml_train.include_test_on_arima_model_train"]], "include_test_on_model_train() (in module iowa_forecast.ml_train)": [[4, "iowa_forecast.ml_train.include_test_on_model_train"]], "iowa_forecast.ml_train": [[4, "module-iowa_forecast.ml_train"]], "train_arima_models() (in module iowa_forecast.ml_train)": [[4, "iowa_forecast.ml_train.train_arima_models"]], "arimaconfig (class in iowa_forecast.models_configs)": [[5, "iowa_forecast.models_configs.ARIMAConfig"]], "arima_plus_xreg_config (class in iowa_forecast.models_configs)": [[5, "iowa_forecast.models_configs.ARIMA_PLUS_XREG_Config"]], "abstractbasemodelconfig (class in iowa_forecast.models_configs)": [[5, "iowa_forecast.models_configs.AbstractBaseModelConfig"]], "basemodelconfig (class in iowa_forecast.models_configs)": [[5, "iowa_forecast.models_configs.BaseModelConfig"]], "supported_parameters (iowa_forecast.models_configs.arimaconfig property)": [[5, "iowa_forecast.models_configs.ARIMAConfig.SUPPORTED_PARAMETERS"]], "supported_parameters (iowa_forecast.models_configs.arima_plus_xreg_config property)": [[5, "iowa_forecast.models_configs.ARIMA_PLUS_XREG_Config.SUPPORTED_PARAMETERS"]], "supported_parameters (iowa_forecast.models_configs.abstractbasemodelconfig property)": [[5, "iowa_forecast.models_configs.AbstractBaseModelConfig.SUPPORTED_PARAMETERS"]], "supported_parameters (iowa_forecast.models_configs.basemodelconfig property)": [[5, "iowa_forecast.models_configs.BaseModelConfig.SUPPORTED_PARAMETERS"]], "iowa_forecast.models_configs": [[5, "module-iowa_forecast.models_configs"]], "convert_to_datetime() (in module iowa_forecast.plots)": [[6, "iowa_forecast.plots.convert_to_datetime"]], "filter_by_date() (in module iowa_forecast.plots)": [[6, "iowa_forecast.plots.filter_by_date"]], "iowa_forecast.plots": [[6, "module-iowa_forecast.plots"]], "plot_historical_and_forecast() (in module iowa_forecast.plots)": [[6, "iowa_forecast.plots.plot_historical_and_forecast"]], "plot_series() (in module iowa_forecast.plots)": [[6, "iowa_forecast.plots.plot_series"]], "create_bigquery_table_from_pandas() (in module iowa_forecast.utils)": [[7, "iowa_forecast.utils.create_bigquery_table_from_pandas"]], "create_dataset_if_not_found() (in module iowa_forecast.utils)": [[7, "iowa_forecast.utils.create_dataset_if_not_found"]], "create_date_offset_from_parts() (in module iowa_forecast.utils)": [[7, "iowa_forecast.utils.create_date_offset_from_parts"]], "date_offset() (in module iowa_forecast.utils)": [[7, "iowa_forecast.utils.date_offset"]], "iowa_forecast.utils": [[7, "module-iowa_forecast.utils"]], "list_tables_with_pattern() (in module iowa_forecast.utils)": [[7, "iowa_forecast.utils.list_tables_with_pattern"]], "normalize_item_name() (in module iowa_forecast.utils)": [[7, "iowa_forecast.utils.normalize_item_name"]], "parse_combined_string() (in module iowa_forecast.utils)": [[7, "iowa_forecast.utils.parse_combined_string"]], "split_table_name_info() (in module iowa_forecast.utils)": [[7, "iowa_forecast.utils.split_table_name_info"]]}}) \ No newline at end of file