Skip to content

Commit

Permalink
Merge branch 'development' into stepshifter
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaolong0728 committed Nov 13, 2024
2 parents 453eccf + 3d67dcb commit ed8615a
Show file tree
Hide file tree
Showing 79 changed files with 444 additions and 201 deletions.
2 changes: 1 addition & 1 deletion common_querysets/queryset_bad_blood.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_natsoc = (Queryset('fatalities002_pgm_natsoc','priogrid_month')
qs_natsoc = (Queryset('fatalities003_pgm_natsoc','priogrid_month')

.with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.missing.replace_na()
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_blank_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_natsoc = (Queryset('fatalities002_pgm_natsoc','priogrid_month')
qs_natsoc = (Queryset('fatalities003_pgm_natsoc','priogrid_month')

.with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.missing.replace_na()
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_brown_cheese.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def generate():
- queryset_base (Queryset): A queryset containing the base data for the model training.
"""

qs_baseline = (Queryset("fatalities002_baseline", "country_month")
qs_baseline = (Queryset("fatalities003_baseline", "country_month")

# target variable
.with_column(Column("ln_ged_sb_dep", from_loa="country_month", from_column="ged_sb_best_sum_nokgi")
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_caring_fish.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_conflict_history = (Queryset('fatalities002_pgm_conflict_history','priogrid_month')
qs_conflict_history = (Queryset('fatalities003_pgm_conflict_history','priogrid_month')

.with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.ops.ln()
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_chunky_cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_conflictlong = (Queryset('fatalities002_pgm_conflictlong','priogrid_month')
qs_conflictlong = (Queryset('fatalities003_pgm_conflictlong','priogrid_month')
.with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.missing.replace_na()
.transform.ops.ln()
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_dark_paradise.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_conflictlong = (Queryset('fatalities002_pgm_conflictlong','priogrid_month')
qs_conflictlong = (Queryset('fatalities003_pgm_conflictlong','priogrid_month')

.with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.missing.replace_na()
Expand Down
8 changes: 4 additions & 4 deletions common_querysets/queryset_invisible_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_broad = (Queryset('fatalities002_pgm_broad','priogrid_month')
qs_broad = (Queryset('fatalities003_pgm_broad','priogrid_month')

.with_column(Column('tlag1_dr_mod_gs', from_loa='priogrid_month', from_column='tlag1_dr_mod_gs')
.transform.missing.replace_na(0)
Expand Down Expand Up @@ -52,17 +52,17 @@ def generate():

.with_column(Column('sptime_dist_k1_ged_sb', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.missing.replace_na()
.transform.spatial.sptime_dist(distances,1,1.0,0.0)
.transform.spatial.sptime_dist('distances',1,1.0,0.0)
)

.with_column(Column('sptime_dist_k10_ged_sb', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.missing.replace_na()
.transform.spatial.sptime_dist(distances,1,10.0,0.0)
.transform.spatial.sptime_dist('distances',1,10.0,0.0)
)

.with_column(Column('sptime_dist_k001_ged_sb', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.missing.replace_na()
.transform.spatial.sptime_dist(distances,1,0.01,0.0)
.transform.spatial.sptime_dist('distances',1,0.01,0.0)
)

.with_column(Column('dist_diamsec', from_loa='priogrid', from_column='dist_diamsec_s_wgs')
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_lavender_haze.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_broad = (Queryset('fatalities002_pgm_broad','priogrid_month')
qs_broad = (Queryset('fatalities003_pgm_broad','priogrid_month')

.with_column(Column('tlag1_dr_mod_gs', from_loa='priogrid_month', from_column='tlag1_dr_mod_gs')
.transform.missing.replace_na(0)
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_midnight_rain.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def generate():
- queryset_base (Queryset): A queryset containing the base data for the model training.
"""

qs_escwa_drought = (Queryset('fatalities002_pgm_escwa_drought','priogrid_month')
qs_escwa_drought = (Queryset('fatalities003_pgm_escwa_drought','priogrid_month')

.with_column(Column('pgd_nlights_calib_mean', from_loa='priogrid_year', from_column='nlights_calib_mean')
.transform.missing.replace_na(0)
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_old_money.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_escwa_drought = (Queryset('fatalities002_pgm_escwa_drought','priogrid_month')
qs_escwa_drought = (Queryset('fatalities003_pgm_escwa_drought','priogrid_month')

.with_column(Column('pgd_nlights_calib_mean', from_loa='priogrid_year', from_column='nlights_calib_mean')
.transform.missing.replace_na(0)
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_orange_pasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_baseline = (Queryset('fatalities002_pgm_baseline','priogrid_month')
qs_baseline = (Queryset('fatalities003_pgm_baseline','priogrid_month')

.with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.missing.replace_na()
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_wildest_dream.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
def generate():


qs_sptime_dist = (Queryset('fatalities002_pgm_conflict_sptime_dist','priogrid_month')
qs_sptime_dist = (Queryset('fatalities003_pgm_conflict_sptime_dist','priogrid_month')

.with_column(Column('ged_gte_1', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.bool.gte(1)
Expand Down
2 changes: 1 addition & 1 deletion common_querysets/queryset_yellow_pikachu.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def generate():

qs_treelag = (Queryset('fatalities002_pgm_conflict_treelag','priogrid_month')
qs_treelag = (Queryset('fatalities003_pgm_conflict_treelag','priogrid_month')

.with_column(Column('ged_gte_1', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
.transform.bool.gte(1)
Expand Down
20 changes: 10 additions & 10 deletions common_utils/model_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,24 +178,24 @@ def get_model_name_from_path(path: Union[Path, str]) -> str:
ValueError: If the model name is not found in the provided path.
"""
path = Path(path)
logger.info(f"Extracting model name from Path: {path}")
logger.debug(f"Extracting model name from Path: {path}")
if "models" in path.parts and "ensembles" not in path.parts:
model_idx = path.parts.index("models")
model_name = path.parts[model_idx + 1]
if utils_model_naming.validate_model_name(model_name):
logger.info(f"Valid model name {model_name} found in path {path}")
logger.debug(f"Valid model name {model_name} found in path {path}")
return str(model_name)
else:
logger.info(f"No valid model name found in path {path}")
logger.debug(f"No valid model name found in path {path}")
return None
if "ensembles" in path.parts and "models" not in path.parts:
model_idx = path.parts.index("ensembles")
model_name = path.parts[model_idx + 1]
if utils_model_naming.validate_model_name(model_name):
logger.info(f"Valid ensemble name {model_name} found in path {path}")
logger.debug(f"Valid ensemble name {model_name} found in path {path}")
return str(model_name)
else:
logger.info(f"No valid ensemble name found in path {path}")
logger.debug(f"No valid ensemble name found in path {path}")
return None
return None

Expand Down Expand Up @@ -319,7 +319,7 @@ def _handle_global_cache(self) -> None:

cached_instance = GlobalCache[self._instance_hash]
if cached_instance and not self._force_cache_overwrite:
logger.info(
logger.debug(
f"ModelPath instance {self.model_name} found in GlobalCache. Using cached instance."
)
return cached_instance
Expand All @@ -337,13 +337,13 @@ def _write_to_global_cache(self) -> None:
from global_cache import GlobalCache

if GlobalCache[self._instance_hash] is None:
logger.info(
logger.debug(
f"Writing {self.target.title}Path object to cache for model {self.model_name}."
)
GlobalCache[self._instance_hash] = self
else:
if self._force_cache_overwrite:
logger.info(
logger.debug(
f"Overwriting {self.target.title}Path object in cache for model {self.model_name}. (_force_cache_overwrite is set to True)"
)
GlobalCache[self._instance_hash] = self
Expand Down Expand Up @@ -481,7 +481,7 @@ def get_queryset(self) -> Optional[Dict[str, str]]:
logger.error(f"Error importing queryset: {e}")
self._queryset = None
else:
logger.info(f"Queryset {self.queryset_path} imported successfully.")
logger.debug(f"Queryset {self.queryset_path} imported successfully.")
return self._queryset.generate() if self._queryset else None
else:
logger.warning(
Expand Down Expand Up @@ -569,7 +569,7 @@ def add_paths_to_sys(self) -> List[str]:
)
return
if model_name == self.model_name:
logger.info(
logger.debug(
f"Path {str(path)} for '{model_name}' is already added to sys.path. Skipping..."
)
if self._sys_paths is None:
Expand Down
1 change: 1 addition & 0 deletions documentation/catalogs/cm_model_catalog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
| Model Name | Algorithm | Target | Input Features | Non-default Hyperparameters | Forecasting Type | Implementation Status | Implementation Date | Author |
| ---------- | --------- | ------ | -------------- | --------------------------- | ---------------- | --------------------- | ------------------- | ------ |
| electric_relaxation | RandomForestClassifier | ged_sb_dep | - [escwa001_cflong](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_electric_relaxation.py) | - [hyperparameters electric_relaxation](https://github.com/prio-data/views_pipeline/blob/main/models/electric_relaxation/configs/config_hyperparameters.py) | None | shadow | NA | Sara |
| brown_cheese | XGBModel | ln_ged_sb_dep | - [fatalities002_baseline](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_brown_cheese.py) | - [hyperparameters brown_cheese](https://github.com/prio-data/views_pipeline/blob/main/models/brown_cheese/configs/config_hyperparameters.py) | None | shadow | NA | Borbála |
18 changes: 12 additions & 6 deletions documentation/catalogs/pgm_model_catalog.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
| Model Name | Algorithm | Target | Input Features | Non-default Hyperparameters | Forecasting Type | Implementation Status | Implementation Date | Author |
| ---------- | --------- | ------ | -------------- | --------------------------- | ---------------- | --------------------- | ------------------- | ------ |
| wildest_dream | HurdleRegression | ln_ged_sb_dep | - [fatalities003_pgm_conflict_sptime_dist](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_wildest_dream.py) | - [hyperparameters wildest_dream](https://github.com/prio-data/views_pipeline/blob/main/models/wildest_dream/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
| old_money | HurdleRegression | ln_ged_sb_dep | - [fatalities003_pgm_escwa_drought](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_old_money.py) | | None | | NA | Xiaolong |
| chunky_cat | LightGBMModel | ln_ged_sb_dep | - [fatalities002_pgm_conflictlong](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_chunky_cat.py) | - [hyperparameters chunky_cat](https://github.com/prio-data/views_pipeline/blob/main/models/chunky_cat/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
| wildest_dream | HurdleRegression | ln_ged_sb_dep | - [fatalities002_pgm_conflict_sptime_dist](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_wildest_dream.py) | - [hyperparameters wildest_dream](https://github.com/prio-data/views_pipeline/blob/main/models/wildest_dream/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
| dark_paradise | HurdleModel | ln_ged_sb_dep | - [fatalities002_pgm_conflictlong](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_dark_paradise.py) | - [hyperparameters dark_paradise](https://github.com/prio-data/views_pipeline/blob/main/models/dark_paradise/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
| midnight_rain | LightGBMModel | ln_ged_sb_dep | - [fatalities002_pgm_escwa_drought](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_midnight_rain.py) | - [hyperparameters midnight_rain](https://github.com/prio-data/views_pipeline/blob/main/models/midnight_rain/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
| invisible_string | LightGBMModel | ln_ged_sb_dep | - [fatalities002_pgm_broad](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_invisible_string.py) | - [hyperparameters invisible_string](https://github.com/prio-data/views_pipeline/blob/main/models/invisible_string/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
| old_money | HurdleModel | ln_ged_sb_dep | - [fatalities002_pgm_escwa_drought](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_old_money.py) | - [hyperparameters old_money](https://github.com/prio-data/views_pipeline/blob/main/models/old_money/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
| purple_alien | HydraNet | ln_sb_best, ln_ns_best, ln_os_best, ln_sb_best_binarized, ln_ns_best_binarized, ln_os_best_binarized | - [escwa001_cflong](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_purple_alien.py) | - [hyperparameters purple_alien](https://github.com/prio-data/views_pipeline/blob/main/models/purple_alien/configs/config_hyperparameters.py) | None | shadow | NA | Simon |
| orange_pasta | LightGBMModel | ged_sb_dep | - [fatalities003_pgm_baseline](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_orange_pasta.py) | - [hyperparameters orange_pasta](https://github.com/prio-data/views_pipeline/blob/main/models/orange_pasta/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
| blank_space | HurdleRegression | ln_ged_sb_dep | - [fatalities003_pgm_natsoc](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_blank_space.py) | - [hyperparameters blank_space](https://github.com/prio-data/views_pipeline/blob/main/models/blank_space/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
| yellow_pikachu | XGBRegressor | ged_sb_dep | - [fatalities003_pgm_conflict_treelag](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_yellow_pikachu.py) | - [hyperparameters yellow_pikachu](https://github.com/prio-data/views_pipeline/blob/main/models/yellow_pikachu/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
| lavender_haze | HurdleRegression | ln_ged_sb_dep | - [fatalities003_pgm_broad](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_lavender_haze.py) | - [hyperparameters lavender_haze](https://github.com/prio-data/views_pipeline/blob/main/models/lavender_haze/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
| orange_pasta | LightGBMModel | ln_ged_sb_dep | - [fatalities002_pgm_baseline](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_orange_pasta.py) | - [hyperparameters orange_pasta](https://github.com/prio-data/views_pipeline/blob/main/models/orange_pasta/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
| blank_space | HurdleRegression | ln_ged_sb_dep | - [fatalities002_pgm_natsoc](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_blank_space.py) | - [hyperparameters blank_space](https://github.com/prio-data/views_pipeline/blob/main/models/blank_space/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
| bad_blood | LightGBMModel | ln_ged_sb_dep | - [fatalities002_pgm_natsoc](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_bad_blood.py) | - [hyperparameters bad_blood](https://github.com/prio-data/views_pipeline/blob/main/models/bad_blood/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
| caring_fish | XGBModel | ln_ged_sb_dep | - [fatalities002_pgm_conflict_history](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_caring_fish.py) | - [hyperparameters caring_fish](https://github.com/prio-data/views_pipeline/blob/main/models/caring_fish/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
| yellow_pikachu | HurdleRegression | ln_ged_sb_dep | - [fatalities002_pgm_conflict_treelag](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_yellow_pikachu.py) | - [hyperparameters yellow_pikachu](https://github.com/prio-data/views_pipeline/blob/main/models/yellow_pikachu/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
| lavender_haze | HurdleRegression | ln_ged_sb_dep | - [fatalities002_pgm_broad](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_lavender_haze.py) | - [hyperparameters lavender_haze](https://github.com/prio-data/views_pipeline/blob/main/models/lavender_haze/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
4 changes: 0 additions & 4 deletions ensembles/cruel_summer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@
## Overview
This folder contains code for Cruel Summer model, an ensemble machine learning model designed for predicting fatalities.

The model utilizes **latest** Lavender Haze (Hurdle Model LGBMClassifier+LGBMRegressor), **latest** Blank Space
(Hurdle Model LGBMClassifier+LGBMRegressor) and **latest** Wildest Dream (Hurdle Model XGBClassifier+XGBRegressor)
for its predictions and is on pgm level of analysis.

The model uses log fatalities.

## Repository Structure
Expand Down
28 changes: 16 additions & 12 deletions ensembles/cruel_summer/configs/config_deployment.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
def get_deployment_config():
"""
Deployment Configuration Script
This script defines the deployment configuration settings for the application.
It includes the deployment status and any additional settings specified.
"""
Contains the configuration for deploying the model into different environments.
This configuration is "behavioral" so modifying it will affect the model's runtime behavior and integration into the deployment system.
Deployment Status:
- shadow: The deployment is shadowed and not yet active.
- deployed: The deployment is active and in use.
- baseline: The deployment is in a baseline state, for reference or comparison.
- deprecated: The deployment is deprecated and no longer supported.
Returns:
- deployment_config (dict): A dictionary containing deployment settings, determining how the model is deployed, including status, endpoints, and resource allocation.
"""
Additional settings can be included in the configuration dictionary as needed.
# More deployment settings can/will be added here
deployment_config = {
"deployment_status": "shadow", # shadow, deployed, baseline, or deprecated
}
"""

return deployment_config
def get_deployment_config():
# Deployment settings
deployment_config = {'deployment_status': 'shadow'}
return deployment_config
2 changes: 1 addition & 1 deletion ensembles/cruel_summer/configs/config_hyperparameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ def get_hp_config():
hp_config = {
"steps": [*range(1, 36 + 1, 1)]
}
return hp_config
return hp_config
Loading

0 comments on commit ed8615a

Please sign in to comment.