Skip to content

Commit

Permalink
merging pgm_models into cm_models
Browse files Browse the repository at this point in the history
  • Loading branch information
lujzi05 committed Nov 8, 2024
2 parents 86cf1c6 + 8273771 commit b2c2605
Show file tree
Hide file tree
Showing 129 changed files with 261 additions and 204 deletions.
2 changes: 1 addition & 1 deletion common_querysets/queryset_bad_blood.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_natsoc = (Queryset('fatalities002_pgm_natsoc','priogrid_month')
qs_natsoc = (Queryset('fatalities003_pgm_natsoc','priogrid_month')

.with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.missing.replace_na()
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_blank_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_natsoc = (Queryset('fatalities002_pgm_natsoc','priogrid_month')
qs_natsoc = (Queryset('fatalities003_pgm_natsoc','priogrid_month')

.with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.missing.replace_na()
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_caring_fish.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_conflict_history = (Queryset('fatalities002_pgm_conflict_history','priogrid_month')
qs_conflict_history = (Queryset('fatalities003_pgm_conflict_history','priogrid_month')

.with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.ops.ln()
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_chunky_cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_conflictlong = (Queryset('fatalities002_pgm_conflictlong','priogrid_month')
qs_conflictlong = (Queryset('fatalities003_pgm_conflictlong','priogrid_month')
.with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.missing.replace_na()
.transform.ops.ln()
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_dark_paradise.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_conflictlong = (Queryset('fatalities002_pgm_conflictlong','priogrid_month')
qs_conflictlong = (Queryset('fatalities003_pgm_conflictlong','priogrid_month')

.with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.missing.replace_na()
Expand Down
8 changes: 4 additions & 4 deletions common_querysets/queryset_invisible_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_broad = (Queryset('fatalities002_pgm_broad','priogrid_month')
qs_broad = (Queryset('fatalities003_pgm_broad','priogrid_month')

.with_column(Column('tlag1_dr_mod_gs', from_loa='priogrid_month', from_column='tlag1_dr_mod_gs')
.transform.missing.replace_na(0)
Expand Down Expand Up @@ -52,17 +52,17 @@ def generate():

.with_column(Column('sptime_dist_k1_ged_sb', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.missing.replace_na()
.transform.spatial.sptime_dist(distances,1,1.0,0.0)
.transform.spatial.sptime_dist('distances',1,1.0,0.0)
)

.with_column(Column('sptime_dist_k10_ged_sb', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.missing.replace_na()
.transform.spatial.sptime_dist(distances,1,10.0,0.0)
.transform.spatial.sptime_dist('distances',1,10.0,0.0)
)

.with_column(Column('sptime_dist_k001_ged_sb', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.missing.replace_na()
.transform.spatial.sptime_dist(distances,1,0.01,0.0)
.transform.spatial.sptime_dist('distances',1,0.01,0.0)
)

.with_column(Column('dist_diamsec', from_loa='priogrid', from_column='dist_diamsec_s_wgs')
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_lavender_haze.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_broad = (Queryset('fatalities002_pgm_broad','priogrid_month')
qs_broad = (Queryset('fatalities003_pgm_broad','priogrid_month')

.with_column(Column('tlag1_dr_mod_gs', from_loa='priogrid_month', from_column='tlag1_dr_mod_gs')
.transform.missing.replace_na(0)
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_midnight_rain.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def generate():
- queryset_base (Queryset): A queryset containing the base data for the model training.
"""

qs_escwa_drought = (Queryset('fatalities002_pgm_escwa_drought','priogrid_month')
qs_escwa_drought = (Queryset('fatalities003_pgm_escwa_drought','priogrid_month')

.with_column(Column('pgd_nlights_calib_mean', from_loa='priogrid_year', from_column='nlights_calib_mean')
.transform.missing.replace_na(0)
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_old_money.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_escwa_drought = (Queryset('fatalities002_pgm_escwa_drought','priogrid_month')
qs_escwa_drought = (Queryset('fatalities003_pgm_escwa_drought','priogrid_month')

.with_column(Column('pgd_nlights_calib_mean', from_loa='priogrid_year', from_column='nlights_calib_mean')
.transform.missing.replace_na(0)
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_orange_pasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_baseline = (Queryset('fatalities002_pgm_baseline','priogrid_month')
qs_baseline = (Queryset('fatalities003_pgm_baseline','priogrid_month')

.with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.missing.replace_na()
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_wildest_dream.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
def generate():


qs_sptime_dist = (Queryset('fatalities002_pgm_conflict_sptime_dist','priogrid_month')
qs_sptime_dist = (Queryset('fatalities003_pgm_conflict_sptime_dist','priogrid_month')

.with_column(Column('ged_gte_1', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.bool.gte(1)
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_yellow_pikachu.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_treelag = (Queryset('fatalities002_pgm_conflict_treelag','priogrid_month')
qs_treelag = (Queryset('fatalities003_pgm_conflict_treelag','priogrid_month')

.with_column(Column('ged_gte_1', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.bool.gte(1)
Expand Down
20 changes: 10 additions & 10 deletions common_utils/model_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,24 +178,24 @@ def get_model_name_from_path(path: Union[Path, str]) -> str:
ValueError: If the model name is not found in the provided path.
"""
path = Path(path)
logger.info(f"Extracting model name from Path: {path}")
logger.debug(f"Extracting model name from Path: {path}")
if "models" in path.parts and "ensembles" not in path.parts:
model_idx = path.parts.index("models")
model_name = path.parts[model_idx + 1]
if utils_model_naming.validate_model_name(model_name):
logger.info(f"Valid model name {model_name} found in path {path}")
logger.debug(f"Valid model name {model_name} found in path {path}")
return str(model_name)
else:
logger.info(f"No valid model name found in path {path}")
logger.debug(f"No valid model name found in path {path}")
return None
if "ensembles" in path.parts and "models" not in path.parts:
model_idx = path.parts.index("ensembles")
model_name = path.parts[model_idx + 1]
if utils_model_naming.validate_model_name(model_name):
logger.info(f"Valid ensemble name {model_name} found in path {path}")
logger.debug(f"Valid ensemble name {model_name} found in path {path}")
return str(model_name)
else:
logger.info(f"No valid ensemble name found in path {path}")
logger.debug(f"No valid ensemble name found in path {path}")
return None
return None

Expand Down Expand Up @@ -319,7 +319,7 @@ def _handle_global_cache(self) -> None:

cached_instance = GlobalCache[self._instance_hash]
if cached_instance and not self._force_cache_overwrite:
logger.info(
logger.debug(
f"ModelPath instance {self.model_name} found in GlobalCache. Using cached instance."
)
return cached_instance
Expand All @@ -337,13 +337,13 @@ def _write_to_global_cache(self) -> None:
from global_cache import GlobalCache

if GlobalCache[self._instance_hash] is None:
logger.info(
logger.debug(
f"Writing {self.target.title}Path object to cache for model {self.model_name}."
)
GlobalCache[self._instance_hash] = self
else:
if self._force_cache_overwrite:
logger.info(
logger.debug(
f"Overwriting {self.target.title}Path object in cache for model {self.model_name}. (_force_cache_overwrite is set to True)"
)
GlobalCache[self._instance_hash] = self
Expand Down Expand Up @@ -481,7 +481,7 @@ def get_queryset(self) -> Optional[Dict[str, str]]:
logger.error(f"Error importing queryset: {e}")
self._queryset = None
else:
logger.info(f"Queryset {self.queryset_path} imported successfully.")
logger.debug(f"Queryset {self.queryset_path} imported successfully.")
return self._queryset.generate() if self._queryset else None
else:
logger.warning(
Expand Down Expand Up @@ -569,7 +569,7 @@ def add_paths_to_sys(self) -> List[str]:
)
return
if model_name == self.model_name:
logger.info(
logger.debug(
f"Path {str(path)} for '{model_name}' is already added to sys.path. Skipping..."
)
if self._sys_paths is None:
Expand Down
4 changes: 0 additions & 4 deletions ensembles/cruel_summer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@
## Overview
This folder contains code for Cruel Summer model, an ensemble machine learning model designed for predicting fatalities.

The model utilizes **latest** Lavender Haze (Hurdle Model LGBMClassifier+LGBMRegressor), **latest** Blank Space
(Hurdle Model LGBMClassifier+LGBMRegressor) and **latest** Wildest Dream (Hurdle Model XGBClassifier+XGBRegressor)
for its predictions and is on pgm level of analysis.

The model uses log fatalities.

## Repository Structure
Expand Down
28 changes: 16 additions & 12 deletions ensembles/cruel_summer/configs/config_deployment.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
def get_deployment_config():
"""
Deployment Configuration Script
This script defines the deployment configuration settings for the application.
It includes the deployment status and any additional settings specified.
"""
Contains the configuration for deploying the model into different environments.
This configuration is "behavioral" so modifying it will affect the model's runtime behavior and integration into the deployment system.
Deployment Status:
- shadow: The deployment is shadowed and not yet active.
- deployed: The deployment is active and in use.
- baseline: The deployment is in a baseline state, for reference or comparison.
- deprecated: The deployment is deprecated and no longer supported.
Returns:
- deployment_config (dict): A dictionary containing deployment settings, determining how the model is deployed, including status, endpoints, and resource allocation.
"""
Additional settings can be included in the configuration dictionary as needed.
# More deployment settings can/will be added here
deployment_config = {
"deployment_status": "shadow", # shadow, deployed, baseline, or deprecated
}
"""

return deployment_config
def get_deployment_config():
# Deployment settings
deployment_config = {'deployment_status': 'shadow'}
return deployment_config
2 changes: 1 addition & 1 deletion ensembles/cruel_summer/configs/config_hyperparameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ def get_hp_config():
hp_config = {
"steps": [*range(1, 36 + 1, 1)]
}
return hp_config
return hp_config
12 changes: 6 additions & 6 deletions ensembles/cruel_summer/configs/config_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ def get_meta_config():
"""
meta_config = {
"name": "cruel_summer",
"models": ["lavender_haze", "blank_space", "wildest_dream"],
"depvar": "ln_ged_sb_dep", # Double-check the target variables of each model
"level": "pgm",
"aggregation": "median",
"creator": "Xiaolong"
"models": ["chunky_cat", "bad_blood"],
"depvar": "ln_ged_sb_dep",
"level": "pgm",
"aggregation": "median",
"creator": "Xiaolong"
}
return meta_config
return meta_config
14 changes: 14 additions & 0 deletions ensembles/cruel_summer/src/dataloaders/get_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import logging
from model_path import ModelPath
from utils_dataloaders import fetch_or_load_views_df

logger = logging.getLogger(__name__)

def get_data(model_name, run_type, use_saved, self_test):
model_path = ModelPath(model_name, validate=False)
path_raw = model_path.data_raw

data, alerts = fetch_or_load_views_df(model_name, run_type, path_raw, self_test, use_saved)
logger.debug(f"DataFrame shape: {data.shape if data is not None else 'None'}")

return data
11 changes: 4 additions & 7 deletions ensembles/cruel_summer/src/forecasting/generate_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from pathlib import Path
from model_path import ModelPath
from ensemble_path import EnsemblePath
from set_partition import get_partitioner_dict
from utils_log_files import create_log_file, read_log_file
from utils_save_outputs import save_predictions
from utils_run import get_standardized_df, get_aggregated_df, get_single_model_config
Expand Down Expand Up @@ -50,14 +49,13 @@ def forecast_ensemble(config):
except FileNotFoundError:
logger.exception(f"Model artifact not found at {path_artifact}")

partition = get_partitioner_dict(run_type)["predict"]
df = stepshift_model.future_point_predict(partition[0]-1, df_viewser, keep_specific=True)
df = stepshift_model.predict(run_type, df_viewser)
df = get_standardized_df(df, model_config)

data_generation_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
date_fetch_timestamp = read_log_file(path_raw / f"{run_type}_data_fetch_log.txt").get("Data Fetch Timestamp", None)
data_fetch_timestamp = read_log_file(path_raw / f"{run_type}_data_fetch_log.txt").get("Data Fetch Timestamp", None)
save_predictions(df, path_generated, model_config)
create_log_file(path_generated, model_config, ts, data_generation_timestamp, date_fetch_timestamp)
create_log_file(path_generated, model_config, ts, data_generation_timestamp, data_fetch_timestamp)

dfs.append(df)

Expand All @@ -69,7 +67,6 @@ def forecast_ensemble(config):
save_predictions(df_prediction, path_generated_e, config)

# How to define an ensemble model timestamp? Currently set as data_generation_timestamp.

create_log_file(path_generated_e, config, data_generation_timestamp, data_generation_timestamp, date_fetch_timestamp=None,
create_log_file(path_generated_e, config, data_generation_timestamp, data_generation_timestamp, data_fetch_timestamp=None,
model_type="ensemble", models=config["models"])

Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,16 @@ def evaluate_ensemble(config):
except FileNotFoundError:
logger.exception(f"Model artifact not found at {path_artifact}")

df = stepshift_model.predict(run_type, "predict", df_viewser)
df = stepshift_model.predict(run_type, df_viewser)
df = get_standardized_df(df, model_config)
data_generation_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
date_fetch_timestamp = read_log_file(path_raw / f"{run_type}_data_fetch_log.txt").get("Data Fetch Timestamp", None)
data_fetch_timestamp = read_log_file(path_raw / f"{run_type}_data_fetch_log.txt").get("Data Fetch Timestamp", None)

_, df_output = generate_output_dict(df, model_config)
evaluation, df_evaluation = generate_metric_dict(df, model_config)
save_model_outputs(df_evaluation, df_output, path_generated, model_config)
save_predictions(df, path_generated, model_config)
create_log_file(path_generated, model_config, ts, data_generation_timestamp, date_fetch_timestamp)
create_log_file(path_generated, model_config, ts, data_generation_timestamp, data_fetch_timestamp)

dfs.append(df)

Expand Down
Loading

0 comments on commit b2c2605

Please sign in to comment.