Merge branch 'development' into stepshifter

prio-data · Nov 13, 2024 · ed8615a · ed8615a
2 parents 453eccf + 3d67dcb
commit ed8615a
Show file tree

Hide file tree

Showing 79 changed files with 444 additions and 201 deletions.
diff --git a/common_querysets/queryset_bad_blood.py b/common_querysets/queryset_bad_blood.py
@@ -2,7 +2,7 @@
 
 def generate():
 
-    qs_natsoc = (Queryset('fatalities002_pgm_natsoc','priogrid_month')
+    qs_natsoc = (Queryset('fatalities003_pgm_natsoc','priogrid_month')
 
                 .with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
                     .transform.missing.replace_na()

diff --git a/common_querysets/queryset_blank_space.py b/common_querysets/queryset_blank_space.py
@@ -2,7 +2,7 @@
 
 def generate():
 
-    qs_natsoc = (Queryset('fatalities002_pgm_natsoc','priogrid_month')
+    qs_natsoc = (Queryset('fatalities003_pgm_natsoc','priogrid_month')
 
                 .with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
                     .transform.missing.replace_na()

diff --git a/common_querysets/queryset_brown_cheese.py b/common_querysets/queryset_brown_cheese.py
@@ -10,7 +10,7 @@ def generate():
     - queryset_base (Queryset): A queryset containing the base data for the model training.
     """
 
-    qs_baseline = (Queryset("fatalities002_baseline", "country_month")
+    qs_baseline = (Queryset("fatalities003_baseline", "country_month")
 
                # target variable
                .with_column(Column("ln_ged_sb_dep", from_loa="country_month", from_column="ged_sb_best_sum_nokgi")

diff --git a/common_querysets/queryset_caring_fish.py b/common_querysets/queryset_caring_fish.py
@@ -2,7 +2,7 @@
 
 def generate():
 
-    qs_conflict_history = (Queryset('fatalities002_pgm_conflict_history','priogrid_month')
+    qs_conflict_history = (Queryset('fatalities003_pgm_conflict_history','priogrid_month')
 
                         .with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
                             .transform.ops.ln()

diff --git a/common_querysets/queryset_chunky_cat.py b/common_querysets/queryset_chunky_cat.py
@@ -2,7 +2,7 @@
 
 def generate():
 
-    qs_conflictlong = (Queryset('fatalities002_pgm_conflictlong','priogrid_month')
+    qs_conflictlong = (Queryset('fatalities003_pgm_conflictlong','priogrid_month')
         .with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
             .transform.missing.replace_na()
             .transform.ops.ln()

diff --git a/common_querysets/queryset_dark_paradise.py b/common_querysets/queryset_dark_paradise.py
@@ -2,7 +2,7 @@
 
 def generate():
 
-    qs_conflictlong = (Queryset('fatalities002_pgm_conflictlong','priogrid_month')
+    qs_conflictlong = (Queryset('fatalities003_pgm_conflictlong','priogrid_month')
 
         .with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
             .transform.missing.replace_na()

diff --git a/common_querysets/queryset_invisible_string.py b/common_querysets/queryset_invisible_string.py
@@ -2,7 +2,7 @@
 
 def generate():
 
-    qs_broad = (Queryset('fatalities002_pgm_broad','priogrid_month')
+    qs_broad = (Queryset('fatalities003_pgm_broad','priogrid_month')
 
               .with_column(Column('tlag1_dr_mod_gs', from_loa='priogrid_month', from_column='tlag1_dr_mod_gs')
                      .transform.missing.replace_na(0)
@@ -52,17 +52,17 @@ def generate():
 
               .with_column(Column('sptime_dist_k1_ged_sb', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
                      .transform.missing.replace_na()
-                     .transform.spatial.sptime_dist(distances,1,1.0,0.0)
+                     .transform.spatial.sptime_dist('distances',1,1.0,0.0)
                      )
 
               .with_column(Column('sptime_dist_k10_ged_sb', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
                      .transform.missing.replace_na()
-                     .transform.spatial.sptime_dist(distances,1,10.0,0.0)
+                     .transform.spatial.sptime_dist('distances',1,10.0,0.0)
                      )
 
               .with_column(Column('sptime_dist_k001_ged_sb', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
                      .transform.missing.replace_na()
-                     .transform.spatial.sptime_dist(distances,1,0.01,0.0)
+                     .transform.spatial.sptime_dist('distances',1,0.01,0.0)
                      )
 
               .with_column(Column('dist_diamsec', from_loa='priogrid', from_column='dist_diamsec_s_wgs')

diff --git a/common_querysets/queryset_lavender_haze.py b/common_querysets/queryset_lavender_haze.py
@@ -2,7 +2,7 @@
 
 def generate():
 
-    qs_broad = (Queryset('fatalities002_pgm_broad','priogrid_month')
+    qs_broad = (Queryset('fatalities003_pgm_broad','priogrid_month')
 
               .with_column(Column('tlag1_dr_mod_gs', from_loa='priogrid_month', from_column='tlag1_dr_mod_gs')
                      .transform.missing.replace_na(0)

diff --git a/common_querysets/queryset_midnight_rain.py b/common_querysets/queryset_midnight_rain.py
@@ -10,7 +10,7 @@ def generate():
     - queryset_base (Queryset): A queryset containing the base data for the model training.
     """
 
-    qs_escwa_drought = (Queryset('fatalities002_pgm_escwa_drought','priogrid_month')
+    qs_escwa_drought = (Queryset('fatalities003_pgm_escwa_drought','priogrid_month')
 
               .with_column(Column('pgd_nlights_calib_mean', from_loa='priogrid_year', from_column='nlights_calib_mean')
                      .transform.missing.replace_na(0)

diff --git a/common_querysets/queryset_old_money.py b/common_querysets/queryset_old_money.py
@@ -2,7 +2,7 @@
 
 def generate():
 
-    qs_escwa_drought = (Queryset('fatalities002_pgm_escwa_drought','priogrid_month')
+    qs_escwa_drought = (Queryset('fatalities003_pgm_escwa_drought','priogrid_month')
 
               .with_column(Column('pgd_nlights_calib_mean', from_loa='priogrid_year', from_column='nlights_calib_mean')
                      .transform.missing.replace_na(0)

diff --git a/common_querysets/queryset_orange_pasta.py b/common_querysets/queryset_orange_pasta.py
@@ -2,7 +2,7 @@
 
 def generate():
 
-    qs_baseline = (Queryset('fatalities002_pgm_baseline','priogrid_month')
+    qs_baseline = (Queryset('fatalities003_pgm_baseline','priogrid_month')
 
                 .with_column(Column('ln_ged_sb_dep', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
                     .transform.missing.replace_na()

diff --git a/common_querysets/queryset_wildest_dream.py b/common_querysets/queryset_wildest_dream.py
@@ -3,7 +3,7 @@
 def generate():
 
 
-    qs_sptime_dist = (Queryset('fatalities002_pgm_conflict_sptime_dist','priogrid_month')
+    qs_sptime_dist = (Queryset('fatalities003_pgm_conflict_sptime_dist','priogrid_month')
 
                      .with_column(Column('ged_gte_1', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
                             .transform.bool.gte(1)

diff --git a/common_querysets/queryset_yellow_pikachu.py b/common_querysets/queryset_yellow_pikachu.py
@@ -2,7 +2,7 @@
 
 def generate():
 
-    qs_treelag = (Queryset('fatalities002_pgm_conflict_treelag','priogrid_month')
+    qs_treelag = (Queryset('fatalities003_pgm_conflict_treelag','priogrid_month')
 
                 .with_column(Column('ged_gte_1', from_loa='priogrid_month', from_column='ged_sb_best_sum_nokgi')
                     .transform.bool.gte(1)

diff --git a/common_utils/model_path.py b/common_utils/model_path.py
@@ -178,24 +178,24 @@ def get_model_name_from_path(path: Union[Path, str]) -> str:
             ValueError: If the model name is not found in the provided path.
         """
         path = Path(path)
-        logger.info(f"Extracting model name from Path: {path}")
+        logger.debug(f"Extracting model name from Path: {path}")
         if "models" in path.parts and "ensembles" not in path.parts:
             model_idx = path.parts.index("models")
             model_name = path.parts[model_idx + 1]
             if utils_model_naming.validate_model_name(model_name):
-                logger.info(f"Valid model name {model_name} found in path {path}")
+                logger.debug(f"Valid model name {model_name} found in path {path}")
                 return str(model_name)
             else:
-                logger.info(f"No valid model name found in path {path}")
+                logger.debug(f"No valid model name found in path {path}")
                 return None
         if "ensembles" in path.parts and "models" not in path.parts:
             model_idx = path.parts.index("ensembles")
             model_name = path.parts[model_idx + 1]
             if utils_model_naming.validate_model_name(model_name):
-                logger.info(f"Valid ensemble name {model_name} found in path {path}")
+                logger.debug(f"Valid ensemble name {model_name} found in path {path}")
                 return str(model_name)
             else:
-                logger.info(f"No valid ensemble name found in path {path}")
+                logger.debug(f"No valid ensemble name found in path {path}")
                 return None
         return None
 
@@ -319,7 +319,7 @@ def _handle_global_cache(self) -> None:
 
             cached_instance = GlobalCache[self._instance_hash]
             if cached_instance and not self._force_cache_overwrite:
-                logger.info(
+                logger.debug(
                     f"ModelPath instance {self.model_name} found in GlobalCache. Using cached instance."
                 )
                 return cached_instance
@@ -337,13 +337,13 @@ def _write_to_global_cache(self) -> None:
         from global_cache import GlobalCache
 
         if GlobalCache[self._instance_hash] is None:
-            logger.info(
+            logger.debug(
                 f"Writing {self.target.title}Path object to cache for model {self.model_name}."
             )
             GlobalCache[self._instance_hash] = self
         else:
             if self._force_cache_overwrite:
-                logger.info(
+                logger.debug(
                     f"Overwriting {self.target.title}Path object in cache for model {self.model_name}. (_force_cache_overwrite is set to True)"
                 )
                 GlobalCache[self._instance_hash] = self
@@ -481,7 +481,7 @@ def get_queryset(self) -> Optional[Dict[str, str]]:
                 logger.error(f"Error importing queryset: {e}")
                 self._queryset = None
             else:
-                logger.info(f"Queryset {self.queryset_path} imported successfully.")
+                logger.debug(f"Queryset {self.queryset_path} imported successfully.")
                 return self._queryset.generate() if self._queryset else None
         else:
             logger.warning(
@@ -569,7 +569,7 @@ def add_paths_to_sys(self) -> List[str]:
                     )
                     return
                 if model_name == self.model_name:
-                    logger.info(
+                    logger.debug(
                         f"Path {str(path)} for '{model_name}' is already added to sys.path. Skipping..."
                     )
         if self._sys_paths is None:

diff --git a/documentation/catalogs/cm_model_catalog.md b/documentation/catalogs/cm_model_catalog.md
@@ -1,3 +1,4 @@
 | Model Name | Algorithm | Target | Input Features | Non-default Hyperparameters | Forecasting Type | Implementation Status | Implementation Date | Author |
 | ---------- | --------- | ------ | -------------- | --------------------------- | ---------------- | --------------------- | ------------------- | ------ |
 | electric_relaxation | RandomForestClassifier | ged_sb_dep | - [escwa001_cflong](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_electric_relaxation.py) | - [hyperparameters electric_relaxation](https://github.com/prio-data/views_pipeline/blob/main/models/electric_relaxation/configs/config_hyperparameters.py) | None | shadow | NA | Sara |
+| brown_cheese | XGBModel | ln_ged_sb_dep | - [fatalities002_baseline](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_brown_cheese.py) | - [hyperparameters brown_cheese](https://github.com/prio-data/views_pipeline/blob/main/models/brown_cheese/configs/config_hyperparameters.py) | None | shadow | NA | Borbála |
diff --git a/documentation/catalogs/pgm_model_catalog.md b/documentation/catalogs/pgm_model_catalog.md
@@ -1,9 +1,15 @@
 | Model Name | Algorithm | Target | Input Features | Non-default Hyperparameters | Forecasting Type | Implementation Status | Implementation Date | Author |
 | ---------- | --------- | ------ | -------------- | --------------------------- | ---------------- | --------------------- | ------------------- | ------ |
-| wildest_dream | HurdleRegression | ln_ged_sb_dep | - [fatalities003_pgm_conflict_sptime_dist](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_wildest_dream.py) | - [hyperparameters wildest_dream](https://github.com/prio-data/views_pipeline/blob/main/models/wildest_dream/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
-| old_money | HurdleRegression | ln_ged_sb_dep | - [fatalities003_pgm_escwa_drought](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_old_money.py) |  | None |  | NA | Xiaolong |
+| chunky_cat | LightGBMModel | ln_ged_sb_dep | - [fatalities002_pgm_conflictlong](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_chunky_cat.py) | - [hyperparameters chunky_cat](https://github.com/prio-data/views_pipeline/blob/main/models/chunky_cat/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
+| wildest_dream | HurdleRegression | ln_ged_sb_dep | - [fatalities002_pgm_conflict_sptime_dist](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_wildest_dream.py) | - [hyperparameters wildest_dream](https://github.com/prio-data/views_pipeline/blob/main/models/wildest_dream/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
+| dark_paradise | HurdleModel | ln_ged_sb_dep | - [fatalities002_pgm_conflictlong](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_dark_paradise.py) | - [hyperparameters dark_paradise](https://github.com/prio-data/views_pipeline/blob/main/models/dark_paradise/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
+| midnight_rain | LightGBMModel | ln_ged_sb_dep | - [fatalities002_pgm_escwa_drought](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_midnight_rain.py) | - [hyperparameters midnight_rain](https://github.com/prio-data/views_pipeline/blob/main/models/midnight_rain/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
+| invisible_string | LightGBMModel | ln_ged_sb_dep | - [fatalities002_pgm_broad](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_invisible_string.py) | - [hyperparameters invisible_string](https://github.com/prio-data/views_pipeline/blob/main/models/invisible_string/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
+| old_money | HurdleModel | ln_ged_sb_dep | - [fatalities002_pgm_escwa_drought](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_old_money.py) | - [hyperparameters old_money](https://github.com/prio-data/views_pipeline/blob/main/models/old_money/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
 | purple_alien | HydraNet | ln_sb_best, ln_ns_best, ln_os_best, ln_sb_best_binarized, ln_ns_best_binarized, ln_os_best_binarized | - [escwa001_cflong](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_purple_alien.py) | - [hyperparameters purple_alien](https://github.com/prio-data/views_pipeline/blob/main/models/purple_alien/configs/config_hyperparameters.py) | None | shadow | NA | Simon |
-| orange_pasta | LightGBMModel | ged_sb_dep | - [fatalities003_pgm_baseline](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_orange_pasta.py) | - [hyperparameters orange_pasta](https://github.com/prio-data/views_pipeline/blob/main/models/orange_pasta/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
-| blank_space | HurdleRegression | ln_ged_sb_dep | - [fatalities003_pgm_natsoc](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_blank_space.py) | - [hyperparameters blank_space](https://github.com/prio-data/views_pipeline/blob/main/models/blank_space/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
-| yellow_pikachu | XGBRegressor | ged_sb_dep | - [fatalities003_pgm_conflict_treelag](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_yellow_pikachu.py) | - [hyperparameters yellow_pikachu](https://github.com/prio-data/views_pipeline/blob/main/models/yellow_pikachu/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
-| lavender_haze | HurdleRegression | ln_ged_sb_dep | - [fatalities003_pgm_broad](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_lavender_haze.py) | - [hyperparameters lavender_haze](https://github.com/prio-data/views_pipeline/blob/main/models/lavender_haze/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
+| orange_pasta | LightGBMModel | ln_ged_sb_dep | - [fatalities002_pgm_baseline](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_orange_pasta.py) | - [hyperparameters orange_pasta](https://github.com/prio-data/views_pipeline/blob/main/models/orange_pasta/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
+| blank_space | HurdleRegression | ln_ged_sb_dep | - [fatalities002_pgm_natsoc](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_blank_space.py) | - [hyperparameters blank_space](https://github.com/prio-data/views_pipeline/blob/main/models/blank_space/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
+| bad_blood | LightGBMModel | ln_ged_sb_dep | - [fatalities002_pgm_natsoc](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_bad_blood.py) | - [hyperparameters bad_blood](https://github.com/prio-data/views_pipeline/blob/main/models/bad_blood/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
+| caring_fish | XGBModel | ln_ged_sb_dep | - [fatalities002_pgm_conflict_history](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_caring_fish.py) | - [hyperparameters caring_fish](https://github.com/prio-data/views_pipeline/blob/main/models/caring_fish/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
+| yellow_pikachu | HurdleRegression | ln_ged_sb_dep | - [fatalities002_pgm_conflict_treelag](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_yellow_pikachu.py) | - [hyperparameters yellow_pikachu](https://github.com/prio-data/views_pipeline/blob/main/models/yellow_pikachu/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
+| lavender_haze | HurdleRegression | ln_ged_sb_dep | - [fatalities002_pgm_broad](https://github.com/prio-data/views_pipeline/blob/main/common_querysets/queryset_lavender_haze.py) | - [hyperparameters lavender_haze](https://github.com/prio-data/views_pipeline/blob/main/models/lavender_haze/configs/config_hyperparameters.py) | None | shadow | NA | Xiaolong |
diff --git a/ensembles/cruel_summer/README.md b/ensembles/cruel_summer/README.md
@@ -2,10 +2,6 @@
 ## Overview
 This folder contains code for Cruel Summer model, an ensemble machine learning model designed for predicting fatalities. 
 
-The model utilizes **latest** Lavender Haze (Hurdle Model LGBMClassifier+LGBMRegressor), **latest** Blank Space 
-(Hurdle Model LGBMClassifier+LGBMRegressor) and **latest** Wildest Dream (Hurdle Model XGBClassifier+XGBRegressor) 
-for its predictions and is on pgm level of analysis.
-
 The model uses log fatalities.
 
 ## Repository Structure

diff --git a/ensembles/cruel_summer/configs/config_deployment.py b/ensembles/cruel_summer/configs/config_deployment.py
@@ -1,16 +1,20 @@
-def get_deployment_config():
+"""
+Deployment Configuration Script
+
+This script defines the deployment configuration settings for the application. 
+It includes the deployment status and any additional settings specified.
 
-    """
-    Contains the configuration for deploying the model into different environments.
-    This configuration is "behavioral" so modifying it will affect the model's runtime behavior and integration into the deployment system.
+Deployment Status:
+- shadow: The deployment is shadowed and not yet active.
+- deployed: The deployment is active and in use.
+- baseline: The deployment is in a baseline state, for reference or comparison.
+- deprecated: The deployment is deprecated and no longer supported.
 
-    Returns:
-    - deployment_config (dict): A dictionary containing deployment settings, determining how the model is deployed, including status, endpoints, and resource allocation.
-    """
+Additional settings can be included in the configuration dictionary as needed.
 
-    # More deployment settings can/will be added here
-    deployment_config = {
-       "deployment_status": "shadow", # shadow, deployed, baseline, or deprecated
-    }
+"""
 
-    return deployment_config
+def get_deployment_config():
+    # Deployment settings
+    deployment_config = {'deployment_status': 'shadow'}
+    return deployment_config
diff --git a/ensembles/cruel_summer/configs/config_hyperparameters.py b/ensembles/cruel_summer/configs/config_hyperparameters.py
@@ -2,4 +2,4 @@ def get_hp_config():
     hp_config = {
         "steps": [*range(1, 36 + 1, 1)]
     }
-    return hp_config
+    return hp_config