Skip to content

Commit

Permalink
Merge branch 'development' into update_generel_logger
Browse files Browse the repository at this point in the history
  • Loading branch information
Polichinel committed Oct 31, 2024
2 parents e962c1c + 9ac24f0 commit 6d0db3c
Show file tree
Hide file tree
Showing 11 changed files with 589 additions and 69 deletions.
41 changes: 0 additions & 41 deletions common_querysets/queryset_meow_meow.py

This file was deleted.

40 changes: 36 additions & 4 deletions meta_tools/model_scaffold_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@
template_config_meta,
template_config_sweep,
template_main,
template_get_data,
template_execute_model_runs,
template_execute_model_tasks,
template_evaluate_model,
template_evaluate_sweep,
template_train_model,
template_utils_run,
template_generate_forecast
)

logging.basicConfig(level=logging.INFO)
Expand Down Expand Up @@ -152,15 +160,15 @@ def build_model_scripts(self):
f"Model directory {self._model.model_dir} does not exist. Please call build_model_directory() first. Aborting script generation."
)
template_config_deployment.generate(
script_dir=self._model.model_dir / "configs/config_deployment.py"
script_dir=self._model.configs / "config_deployment.py"
)
self._model_algorithm = str(
input(
"Enter the algorithm of the model (e.g. XGBoost, LightBGM, HydraNet): "
)
)
template_config_hyperparameters.generate(
script_dir=self._model.model_dir / "configs/config_hyperparameters.py",
script_dir=self._model.configs / "config_hyperparameters.py",
model_algorithm=self._model_algorithm,
)
template_config_input_data.generate(
Expand All @@ -169,15 +177,39 @@ def build_model_scripts(self):
model_name=self._model.model_name,
)
template_config_meta.generate(
script_dir=self._model.model_dir / "configs/config_meta.py",
script_dir=self._model.configs / "config_meta.py",
model_name=self._model.model_name,
model_algorithm=self._model_algorithm,
)
template_config_sweep.generate(
script_dir=self._model.model_dir / "configs/config_sweep.py",
script_dir=self._model.configs / "config_sweep.py",
model_algorithm=self._model_algorithm,
)
template_main.generate(script_dir=self._model.model_dir / "main.py")
template_get_data.generate(script_dir=self._model.dataloaders / "get_data.py")
template_generate_forecast.generate(
script_dir=self._model.forecasting / "generate_forecast.py"
)
template_execute_model_runs.generate(
script_dir=self._model.management / "execute_model_runs.py")
template_execute_model_tasks.generate(
script_dir=self._model.management / "execute_model_tasks.py"
)
template_evaluate_model.generate(
script_dir=self._model.offline_evaluation / "evaluate_model.py"
)
template_evaluate_sweep.generate(
script_dir=self._model.offline_evaluation / "evaluate_sweep.py"
)
template_train_model.generate(
script_dir=self._model.training / "train_model.py"
)
template_utils_run.generate(
script_dir=self._model.utils / "utils_run.py"
)
# INFO: utils_outputs.py was not templated because it will probably be moved to common_utils in the future.
logging.info(f"Remember to update the queryset file at {self._model.queryset_path}!")


def assess_model_directory(self) -> dict:
"""
Expand Down
76 changes: 76 additions & 0 deletions meta_tools/templates/model/template_evaluate_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from utils import utils_script_gen
from pathlib import Path


def generate(script_dir: Path) -> bool:
"""
Generates a Python script with a predefined template and saves it to the specified directory.
The generated script includes a function to evaluate a model artifact. It handles loading the model,
making predictions, standardizing the data, generating evaluation metrics, and saving the outputs.
It also logs relevant information using Weights & Biases (wandb).
Args:
script_dir (Path): The directory where the generated script will be saved.
Returns:
bool: True if the script was successfully saved, False otherwise.
"""

code = f"""from datetime import datetime
import pandas as pd
import logging
from model_path import ModelPath
from utils_log_files import create_log_file, read_log_file
from utils_outputs import save_model_outputs, save_predictions
from utils_run import get_standardized_df
from utils_artifacts import get_latest_model_artifact
from utils_evaluation_metrics import generate_metric_dict
from utils_model_outputs import generate_output_dict
from utils_wandb import log_wandb_log_dict
from views_forecasts.extensions import *
logger = logging.getLogger(__name__)
def evaluate_model_artifact(config, artifact_name):
model_path = ModelPath(config["name"])
path_raw = model_path.data_raw
path_generated = model_path.data_generated
path_artifacts = model_path.artifacts
run_type = config["run_type"]
# if an artifact name is provided through the CLI, use it.
# Otherwise, get the latest model artifact based on the run type
if artifact_name:
logger.info(f"Using (non-default) artifact: {{artifact_name}}")
if not artifact_name.endswith(".pkl"):
artifact_name += ".pkl"
PATH_ARTIFACT = path_artifacts / artifact_name
else:
# use the latest model artifact based on the run type
logger.info(f"Using latest (default) run type ({{run_type}}) specific artifact")
PATH_ARTIFACT = get_latest_model_artifact(path_artifacts, run_type)
config["timestamp"] = PATH_ARTIFACT.stem[-15:]
df_viewser = pd.read_pickle(path_raw / f"{{run_type}}_viewser_df.pkl")
try:
stepshift_model = pd.read_pickle(PATH_ARTIFACT)
except FileNotFoundError:
logger.exception(f"Model artifact not found at {{PATH_ARTIFACT}}")
df = stepshift_model.predict(run_type, df_viewser)
df = get_standardized_df(df, config)
data_generation_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
date_fetch_timestamp = read_log_file(path_raw / f"{{run_type}}_data_fetch_log.txt").get("Data Fetch Timestamp", None)
_, df_output = generate_output_dict(df, config)
evaluation, df_evaluation = generate_metric_dict(df, config)
log_wandb_log_dict(config, evaluation)
save_model_outputs(df_evaluation, df_output, path_generated, config)
save_predictions(df, path_generated, config)
create_log_file(path_generated, config, config["timestamp"], data_generation_timestamp, date_fetch_timestamp)
"""
return utils_script_gen.save_script(script_dir, code)
49 changes: 49 additions & 0 deletions meta_tools/templates/model/template_evaluate_sweep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from utils import utils_script_gen
from pathlib import Path


def generate(script_dir: Path) -> bool:
"""
Generates a Python script with a predefined template and saves it to the specified directory.
The generated script includes a function to evaluate a sweep of model runs. It handles loading the model,
making predictions, standardizing the data, calculating the mean squared error (MSE), generating evaluation metrics,
and logging the results using Weights & Biases (wandb).
Args:
script_dir (Path): The directory where the generated script will be saved.
Returns:
bool: True if the script was successfully saved, False otherwise.
"""

code = f"""import pandas as pd
import wandb
from sklearn.metrics import mean_squared_error
from model_path import ModelPath
from utils_run import get_standardized_df
from utils_wandb import log_wandb_log_dict
from utils_evaluation_metrics import generate_metric_dict
def evaluate_sweep(config, stepshift_model):
model_path = ModelPath(config["name"])
path_raw = model_path.data_raw
run_type = config["run_type"]
steps = config["steps"]
df_viewser = pd.read_pickle(path_raw / f"{{{{run_type}}}}_viewser_df.pkl")
df = stepshift_model.predict(run_type, df_viewser)
df = get_standardized_df(df, config)
# Temporarily keep this because the metric to minimize is MSE
pred_cols = [f"step_pred_{{{{str(i)}}}}" for i in steps]
df["mse"] = df.apply(lambda row: mean_squared_error([row[config["depvar"]]] * 36,
[row[col] for col in pred_cols]), axis=1)
wandb.log({{"MSE": df["mse"].mean()}})
evaluation, _ = generate_metric_dict(df, config)
log_wandb_log_dict(config, evaluation)
"""
return utils_script_gen.save_script(script_dir, code)
59 changes: 59 additions & 0 deletions meta_tools/templates/model/template_execute_model_runs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from utils import utils_script_gen
from pathlib import Path


def generate(script_dir: Path) -> bool:
"""
Generates a Python script with a predefined template and saves it to the specified directory.
The generated script includes functions to execute model runs, either as a sweep or a single run.
It uses configurations for deployment, hyperparameters, meta, and sweep, and integrates with Weights & Biases (wandb) for experiment tracking.
Args:
script_dir (Path): The directory where the generated script will be saved.
Returns:
bool: True if the script was successfully saved, False otherwise.
"""

code = f"""import wandb
from config_deployment import get_deployment_config
from config_hyperparameters import get_hp_config
from config_meta import get_meta_config
from config_sweep import get_sweep_config
from execute_model_tasks import execute_model_tasks
from get_data import get_data
from utils_run import update_config, update_sweep_config
def execute_sweep_run(args):
sweep_config = get_sweep_config()
meta_config = get_meta_config()
update_sweep_config(sweep_config, args, meta_config)
get_data(args, sweep_config["name"])
project = f"{{sweep_config['name']}}_sweep" # we can name the sweep in the config file
sweep_id = wandb.sweep(sweep_config, project=project, entity="views_pipeline")
wandb.agent(sweep_id, execute_model_tasks, entity="views_pipeline")
def execute_single_run(args):
hp_config = get_hp_config()
meta_config = get_meta_config()
dp_config = get_deployment_config()
config = update_config(hp_config, meta_config, dp_config, args)
get_data(args, config["name"])
project = f"{{config['name']}}_{{args.run_type}}"
if args.run_type == "calibration" or args.run_type == "testing":
execute_model_tasks(config=config, project=project, train=args.train, eval=args.evaluate,
forecast=False, artifact_name=args.artifact_name)
elif args.run_type == "forecasting":
execute_model_tasks(config=config, project=project, train=args.train, eval=False,
forecast=args.forecast, artifact_name=args.artifact_name)
"""
return utils_script_gen.save_script(script_dir, code)
91 changes: 91 additions & 0 deletions meta_tools/templates/model/template_execute_model_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from utils import utils_script_gen
from pathlib import Path


def generate(script_dir: Path) -> bool:
"""
Generates a Python script with a predefined template and saves it to the specified directory.
The generated script includes a function to execute various model-related tasks such as training,
evaluation, and forecasting. It integrates with Weights & Biases (wandb) for experiment tracking
and logging.
Args:
script_dir (Path): The directory where the generated script will be saved.
Returns:
bool: True if the script was successfully saved, False otherwise.
"""

code = f"""import wandb
import logging
import time
from evaluate_model import evaluate_model_artifact
from evaluate_sweep import evaluate_sweep
from generate_forecast import forecast_model_artifact
from train_model import train_model_artifact
from utils_run import split_hurdle_parameters
from utils_wandb import add_wandb_monthly_metrics
logger = logging.getLogger(__name__)
def execute_model_tasks(config=None, project=None, train=None, eval=None, forecast=None, artifact_name=None):
\"""
Executes various model-related tasks including training, evaluation, and forecasting.
This function manages the execution of different tasks such as training the model,
evaluating an existing model, or performing forecasting.
It also initializes the WandB project.
Args:
config: Configuration object containing parameters and settings.
project: The WandB project name.
train: Flag to indicate if the model should be trained.
eval: Flag to indicate if the model should be evaluated.
forecast: Flag to indicate if forecasting should be performed.
artifact_name (optional): Specific name of the model artifact to load for evaluation or forecasting.
\"""
start_t = time.time()
# Initialize WandB
with wandb.init(project=project, entity="views_pipeline",
config=config): # project and config ignored when running a sweep
# add the monthly metrics to WandB
add_wandb_monthly_metrics()
# Update config from WandB initialization above
config = wandb.config
# W&B does not directly support nested dictionaries for hyperparameters
# This will make the sweep config super ugly, but we don't have to distinguish between sweep and single runs
if config["sweep"] and config["algorithm"] == "HurdleRegression":
config["parameters"] = {{}}
config["parameters"]["clf"], config["parameters"]["reg"] = split_hurdle_parameters(config)
if config["sweep"]:
logger.info(f"Sweeping model {{config['name']}}...")
stepshift_model = train_model_artifact(config)
logger.info(f"Evaluating model {{config['name']}}...")
evaluate_sweep(config, stepshift_model)
# Handle the single model runs: train and save the model as an artifact
if train:
logger.info(f"Training model {{config['name']}}...")
train_model_artifact(config)
# Handle the single model runs: evaluate a trained model (artifact)
if eval:
logger.info(f"Evaluating model {{config['name']}}...")
evaluate_model_artifact(config, artifact_name)
if forecast:
logger.info(f"Forecasting model {{config['name']}}...")
forecast_model_artifact(config, artifact_name)
end_t = time.time()
minutes = (end_t - start_t) / 60
logger.info(f"Done. Runtime: {{minutes:.3f}} minutes.\\n")
"""
return utils_script_gen.save_script(script_dir, code)
Loading

0 comments on commit 6d0db3c

Please sign in to comment.