-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add new Path solution to ESCWA model #23
base: production
Are you sure you want to change the base?
Changes from 14 commits
261cbc5
97da312
91da48d
a95438e
d426c29
77bc3a7
69ede05
067f6e9
cb09a90
1ac2ec4
8223931
c109228
fdb3f2c
2031c08
c1b655a
a8f05f0
c9011f1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
evaluation_metrics = {'Mean Mean Squared Error': 0.002929262727152805, 'Mean Average Precision': 0.07515270506108203, 'Mean Brier Score': 0.002929262727152805} | ||
evaluation_metrics = {'Mean Mean Squared Error': 0.0029154554168954083, 'Mean Average Precision': 0.07515270506108203, 'Mean Brier Score': 0.0029154554168954083} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import sys\n", | ||
"from pathlib import Path\n", | ||
"import pandas as pd\n", | ||
"import pickle\n", | ||
"\n", | ||
"from sklearn.ensemble import RandomForestClassifier\n", | ||
"\n", | ||
"from stepshift.views import StepshiftedModels\n", | ||
"from views_runs import DataPartitioner, ViewsRun\n", | ||
"\n", | ||
"PATH = Path.cwd() \n", | ||
"sys.path.insert(0, str(Path(*[i for i in PATH.parts[:PATH.parts.index(\"views_pipeline\")+1]]) / \"common_utils\")) # PATH_COMMON_UTILS\n", | ||
"from set_path import setup_project_paths, setup_artifacts_paths, setup_data_paths\n", | ||
"setup_project_paths(PATH) #adds all necessary paths to sys.path\n", | ||
"\n", | ||
"from config_data_partitions import get_data_partitions #change to common_utils/set_partition.py\n", | ||
"from config_hyperparameters import get_hp_config\n", | ||
"from config_model import get_model_config" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def train(model_config, hp_config, data_partitions):\n", | ||
" print(\"Training...\")\n", | ||
"\n", | ||
" # Define the artifacts path manually or according to your notebook structure\n", | ||
" artifacts_path = Path(\"your_path_to_artifacts_directory\")\n", | ||
"\n", | ||
" calib_pickle_path = artifacts_path / \"model_calibration_partition.pkl\"\n", | ||
" future_pickle_path = artifacts_path / \"model_future_partition.pkl\"\n", | ||
"\n", | ||
" if calib_pickle_path.exists() and future_pickle_path.exists():\n", | ||
" print(\"Pickle files already exist. Loading models from pickle files...\")\n", | ||
" with open(calib_pickle_path, 'rb') as file:\n", | ||
" model_calibration_partition = pickle.load(file)\n", | ||
" with open(future_pickle_path, 'rb') as file:\n", | ||
" model_future_partition = pickle.load(file)\n", | ||
"\n", | ||
" else:\n", | ||
" # Assuming you have loaded the dataset before calling this function\n", | ||
" dataset = \"models/electric_relaxation/data/raw/raw.parquet\" # Load your dataset here\n", | ||
"\n", | ||
" calib_partition = DataPartitioner({'calib': data_partitions[\"calib_partitioner_dict\"]})\n", | ||
" future_partition = DataPartitioner({'future': data_partitions[\"future_partitioner_dict\"]})\n", | ||
"\n", | ||
" base_model = RandomForestClassifier(n_estimators=hp_config[\"n_estimators\"], n_jobs=hp_config[\"n_jobs\"])\n", | ||
" stepshifter_def = StepshiftedModels(base_model, model_config[\"steps\"], model_config[\"depvar\"])\n", | ||
"\n", | ||
" model_calibration_partition = ViewsRun(calib_partition, stepshifter_def)\n", | ||
" model_calibration_partition.fit('calib', 'train', dataset)\n", | ||
"\n", | ||
" model_future_partition = ViewsRun(future_partition, stepshifter_def)\n", | ||
" model_future_partition.fit('future', 'train', dataset)\n", | ||
"\n", | ||
" assert model_calibration_partition is not None and model_future_partition is not None, \"Model training failed.\"\n", | ||
"\n", | ||
" with open(calib_pickle_path, 'wb') as file:\n", | ||
" pickle.dump(model_calibration_partition, file)\n", | ||
" with open(future_pickle_path, 'wb') as file:\n", | ||
" pickle.dump(model_future_partition, file)\n", | ||
"\n", | ||
" print(\"Models trained and saved in artifacts folder!\")\n", | ||
"\n", | ||
" return model_calibration_partition, model_future_partition\n" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "viewser", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.18" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,9 +6,12 @@ | |
|
||
from sklearn.metrics import mean_squared_error, average_precision_score, roc_auc_score, brier_score_loss | ||
|
||
model_path = Path(__file__).resolve().parents[2] | ||
sys.path.append(str(model_path)) | ||
from configs.config_model import get_model_config | ||
PATH = Path(__file__) | ||
sys.path.insert(0, str(Path(*[i for i in PATH.parts[:PATH.parts.index("views_pipeline")+1]]) / "common_utils")) # PATH_COMMON_UTILS | ||
from set_path import setup_project_paths, setup_artifacts_paths, setup_data_paths | ||
setup_project_paths(PATH) #adds all necessary paths to sys.path | ||
|
||
from config_model import get_model_config | ||
|
||
|
||
def evaluate_model(model_config): | ||
|
@@ -34,7 +37,10 @@ def evaluate_model(model_config): | |
""" | ||
print("Evaluating...") | ||
|
||
df_calib = pd.read_parquet(model_path/"data"/"generated"/"calibration_predictions.parquet") | ||
PATH_MODEL, PATH_RAW, PATH_PROCESSED, PATH_GENERATED = setup_data_paths(PATH) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. with the new implementation, you should not get PATH_MODEL from here. You can get it from setup_model_path but given the use below I think you should just use setup_artifacts_path |
||
|
||
#df_calib = pd.read_parquet(model_path/"data"/"generated"/"calibration_predictions.parquet") | ||
df_calib = pd.read_parquet(PATH_GENERATED / "calibration_predictions.parquet") | ||
|
||
steps = model_config["steps"] | ||
depvar = [model_config["depvar"]] #formerly stepcols, changed to depvar to also use in true_values | ||
|
@@ -61,7 +67,7 @@ def evaluate_model(model_config): | |
[row[col] for col in pred_cols]), axis=1) | ||
mean_brier_score = df_calib["brier_score"].mean() | ||
|
||
metrics_dict_path = model_path / "artifacts" / "evaluation_metrics.py" | ||
metrics_dict_path = PATH_MODEL / "artifacts" / "evaluation_metrics.py" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. then you can also simplify this a bit |
||
|
||
evaluation_metrics_calib = { | ||
"Mean Mean Squared Error": mean_mse, | ||
|
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have kept this as purely a data path thing. The PATH_MODEL can be defined using setup_model_paths().
But I think the way it is used below you might be better off just using setup_artifacts_path()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So, I'll push the latest changes to main tomorrow. Just running sweep to see that everything is as it should be after incorporating your last comments. After that you should merge from main to this