-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'development' into update_generel_logger
- Loading branch information
Showing
11 changed files
with
589 additions
and
69 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
from utils import utils_script_gen | ||
from pathlib import Path | ||
|
||
|
||
def generate(script_dir: Path) -> bool: | ||
""" | ||
Generates a Python script with a predefined template and saves it to the specified directory. | ||
The generated script includes a function to evaluate a model artifact. It handles loading the model, | ||
making predictions, standardizing the data, generating evaluation metrics, and saving the outputs. | ||
It also logs relevant information using Weights & Biases (wandb). | ||
Args: | ||
script_dir (Path): The directory where the generated script will be saved. | ||
Returns: | ||
bool: True if the script was successfully saved, False otherwise. | ||
""" | ||
|
||
code = f"""from datetime import datetime | ||
import pandas as pd | ||
import logging | ||
from model_path import ModelPath | ||
from utils_log_files import create_log_file, read_log_file | ||
from utils_outputs import save_model_outputs, save_predictions | ||
from utils_run import get_standardized_df | ||
from utils_artifacts import get_latest_model_artifact | ||
from utils_evaluation_metrics import generate_metric_dict | ||
from utils_model_outputs import generate_output_dict | ||
from utils_wandb import log_wandb_log_dict | ||
from views_forecasts.extensions import * | ||
logger = logging.getLogger(__name__) | ||
def evaluate_model_artifact(config, artifact_name): | ||
model_path = ModelPath(config["name"]) | ||
path_raw = model_path.data_raw | ||
path_generated = model_path.data_generated | ||
path_artifacts = model_path.artifacts | ||
run_type = config["run_type"] | ||
# if an artifact name is provided through the CLI, use it. | ||
# Otherwise, get the latest model artifact based on the run type | ||
if artifact_name: | ||
logger.info(f"Using (non-default) artifact: {{artifact_name}}") | ||
if not artifact_name.endswith(".pkl"): | ||
artifact_name += ".pkl" | ||
PATH_ARTIFACT = path_artifacts / artifact_name | ||
else: | ||
# use the latest model artifact based on the run type | ||
logger.info(f"Using latest (default) run type ({{run_type}}) specific artifact") | ||
PATH_ARTIFACT = get_latest_model_artifact(path_artifacts, run_type) | ||
config["timestamp"] = PATH_ARTIFACT.stem[-15:] | ||
df_viewser = pd.read_pickle(path_raw / f"{{run_type}}_viewser_df.pkl") | ||
try: | ||
stepshift_model = pd.read_pickle(PATH_ARTIFACT) | ||
except FileNotFoundError: | ||
logger.exception(f"Model artifact not found at {{PATH_ARTIFACT}}") | ||
df = stepshift_model.predict(run_type, df_viewser) | ||
df = get_standardized_df(df, config) | ||
data_generation_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | ||
date_fetch_timestamp = read_log_file(path_raw / f"{{run_type}}_data_fetch_log.txt").get("Data Fetch Timestamp", None) | ||
_, df_output = generate_output_dict(df, config) | ||
evaluation, df_evaluation = generate_metric_dict(df, config) | ||
log_wandb_log_dict(config, evaluation) | ||
save_model_outputs(df_evaluation, df_output, path_generated, config) | ||
save_predictions(df, path_generated, config) | ||
create_log_file(path_generated, config, config["timestamp"], data_generation_timestamp, date_fetch_timestamp) | ||
""" | ||
return utils_script_gen.save_script(script_dir, code) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from utils import utils_script_gen | ||
from pathlib import Path | ||
|
||
|
||
def generate(script_dir: Path) -> bool: | ||
""" | ||
Generates a Python script with a predefined template and saves it to the specified directory. | ||
The generated script includes a function to evaluate a sweep of model runs. It handles loading the model, | ||
making predictions, standardizing the data, calculating the mean squared error (MSE), generating evaluation metrics, | ||
and logging the results using Weights & Biases (wandb). | ||
Args: | ||
script_dir (Path): The directory where the generated script will be saved. | ||
Returns: | ||
bool: True if the script was successfully saved, False otherwise. | ||
""" | ||
|
||
code = f"""import pandas as pd | ||
import wandb | ||
from sklearn.metrics import mean_squared_error | ||
from model_path import ModelPath | ||
from utils_run import get_standardized_df | ||
from utils_wandb import log_wandb_log_dict | ||
from utils_evaluation_metrics import generate_metric_dict | ||
def evaluate_sweep(config, stepshift_model): | ||
model_path = ModelPath(config["name"]) | ||
path_raw = model_path.data_raw | ||
run_type = config["run_type"] | ||
steps = config["steps"] | ||
df_viewser = pd.read_pickle(path_raw / f"{{{{run_type}}}}_viewser_df.pkl") | ||
df = stepshift_model.predict(run_type, df_viewser) | ||
df = get_standardized_df(df, config) | ||
# Temporarily keep this because the metric to minimize is MSE | ||
pred_cols = [f"step_pred_{{{{str(i)}}}}" for i in steps] | ||
df["mse"] = df.apply(lambda row: mean_squared_error([row[config["depvar"]]] * 36, | ||
[row[col] for col in pred_cols]), axis=1) | ||
wandb.log({{"MSE": df["mse"].mean()}}) | ||
evaluation, _ = generate_metric_dict(df, config) | ||
log_wandb_log_dict(config, evaluation) | ||
""" | ||
return utils_script_gen.save_script(script_dir, code) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
from utils import utils_script_gen | ||
from pathlib import Path | ||
|
||
|
||
def generate(script_dir: Path) -> bool: | ||
""" | ||
Generates a Python script with a predefined template and saves it to the specified directory. | ||
The generated script includes functions to execute model runs, either as a sweep or a single run. | ||
It uses configurations for deployment, hyperparameters, meta, and sweep, and integrates with Weights & Biases (wandb) for experiment tracking. | ||
Args: | ||
script_dir (Path): The directory where the generated script will be saved. | ||
Returns: | ||
bool: True if the script was successfully saved, False otherwise. | ||
""" | ||
|
||
code = f"""import wandb | ||
from config_deployment import get_deployment_config | ||
from config_hyperparameters import get_hp_config | ||
from config_meta import get_meta_config | ||
from config_sweep import get_sweep_config | ||
from execute_model_tasks import execute_model_tasks | ||
from get_data import get_data | ||
from utils_run import update_config, update_sweep_config | ||
def execute_sweep_run(args): | ||
sweep_config = get_sweep_config() | ||
meta_config = get_meta_config() | ||
update_sweep_config(sweep_config, args, meta_config) | ||
get_data(args, sweep_config["name"]) | ||
project = f"{{sweep_config['name']}}_sweep" # we can name the sweep in the config file | ||
sweep_id = wandb.sweep(sweep_config, project=project, entity="views_pipeline") | ||
wandb.agent(sweep_id, execute_model_tasks, entity="views_pipeline") | ||
def execute_single_run(args): | ||
hp_config = get_hp_config() | ||
meta_config = get_meta_config() | ||
dp_config = get_deployment_config() | ||
config = update_config(hp_config, meta_config, dp_config, args) | ||
get_data(args, config["name"]) | ||
project = f"{{config['name']}}_{{args.run_type}}" | ||
if args.run_type == "calibration" or args.run_type == "testing": | ||
execute_model_tasks(config=config, project=project, train=args.train, eval=args.evaluate, | ||
forecast=False, artifact_name=args.artifact_name) | ||
elif args.run_type == "forecasting": | ||
execute_model_tasks(config=config, project=project, train=args.train, eval=False, | ||
forecast=args.forecast, artifact_name=args.artifact_name) | ||
""" | ||
return utils_script_gen.save_script(script_dir, code) |
91 changes: 91 additions & 0 deletions
91
meta_tools/templates/model/template_execute_model_tasks.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
from utils import utils_script_gen | ||
from pathlib import Path | ||
|
||
|
||
def generate(script_dir: Path) -> bool: | ||
""" | ||
Generates a Python script with a predefined template and saves it to the specified directory. | ||
The generated script includes a function to execute various model-related tasks such as training, | ||
evaluation, and forecasting. It integrates with Weights & Biases (wandb) for experiment tracking | ||
and logging. | ||
Args: | ||
script_dir (Path): The directory where the generated script will be saved. | ||
Returns: | ||
bool: True if the script was successfully saved, False otherwise. | ||
""" | ||
|
||
code = f"""import wandb | ||
import logging | ||
import time | ||
from evaluate_model import evaluate_model_artifact | ||
from evaluate_sweep import evaluate_sweep | ||
from generate_forecast import forecast_model_artifact | ||
from train_model import train_model_artifact | ||
from utils_run import split_hurdle_parameters | ||
from utils_wandb import add_wandb_monthly_metrics | ||
logger = logging.getLogger(__name__) | ||
def execute_model_tasks(config=None, project=None, train=None, eval=None, forecast=None, artifact_name=None): | ||
\""" | ||
Executes various model-related tasks including training, evaluation, and forecasting. | ||
This function manages the execution of different tasks such as training the model, | ||
evaluating an existing model, or performing forecasting. | ||
It also initializes the WandB project. | ||
Args: | ||
config: Configuration object containing parameters and settings. | ||
project: The WandB project name. | ||
train: Flag to indicate if the model should be trained. | ||
eval: Flag to indicate if the model should be evaluated. | ||
forecast: Flag to indicate if forecasting should be performed. | ||
artifact_name (optional): Specific name of the model artifact to load for evaluation or forecasting. | ||
\""" | ||
start_t = time.time() | ||
# Initialize WandB | ||
with wandb.init(project=project, entity="views_pipeline", | ||
config=config): # project and config ignored when running a sweep | ||
# add the monthly metrics to WandB | ||
add_wandb_monthly_metrics() | ||
# Update config from WandB initialization above | ||
config = wandb.config | ||
# W&B does not directly support nested dictionaries for hyperparameters | ||
# This will make the sweep config super ugly, but we don't have to distinguish between sweep and single runs | ||
if config["sweep"] and config["algorithm"] == "HurdleRegression": | ||
config["parameters"] = {{}} | ||
config["parameters"]["clf"], config["parameters"]["reg"] = split_hurdle_parameters(config) | ||
if config["sweep"]: | ||
logger.info(f"Sweeping model {{config['name']}}...") | ||
stepshift_model = train_model_artifact(config) | ||
logger.info(f"Evaluating model {{config['name']}}...") | ||
evaluate_sweep(config, stepshift_model) | ||
# Handle the single model runs: train and save the model as an artifact | ||
if train: | ||
logger.info(f"Training model {{config['name']}}...") | ||
train_model_artifact(config) | ||
# Handle the single model runs: evaluate a trained model (artifact) | ||
if eval: | ||
logger.info(f"Evaluating model {{config['name']}}...") | ||
evaluate_model_artifact(config, artifact_name) | ||
if forecast: | ||
logger.info(f"Forecasting model {{config['name']}}...") | ||
forecast_model_artifact(config, artifact_name) | ||
end_t = time.time() | ||
minutes = (end_t - start_t) / 60 | ||
logger.info(f"Done. Runtime: {{minutes:.3f}} minutes.\\n") | ||
""" | ||
return utils_script_gen.save_script(script_dir, code) |
Oops, something went wrong.