-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #71 from prio-data/create_cm_catalog_01
Create cm catalog 01
- Loading branch information
Showing
2 changed files
with
188 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
| Model Name | Algorithm | Target | Input Features | Non-default Hyperparameters | Forecasting Type | Implementation Status | Implementation Date | Author | | ||
| ---------- | --------- | ------ | -------------- | --------------------------- | ---------------- | --------------------- | ------------------- | ------ | | ||
| fatalities002_baseline_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_baseline](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L24) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_conflicthistory_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_conflict_history](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3087) | n_estimators=250, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_conflicthistory_gbm | GradientBoostingRegressor | ln_ged_sb_dep | - [fatalities002_conflict_history](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3087) | n_estimators=200 | Direct multi-step | no | NA | NA | | ||
| fatalities002_conflicthistory_hurdle_lgb | HurdleRegression | ln_ged_sb_dep | - [fatalities002_conflict_history](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3087) | clf_name="LGBMClassifier", reg_name="LGBMRegressor" | Direct multi-step | no | NA | NA | | ||
| fatalities002_conflicthistory_long_xgb | XGBRegressor | ln_ged_sb_dep | - [fatalities002_conflict_history_long](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3101) | n_estimators=100, learning_rate=0.05, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_vdem_hurdle_xgb | HurdleRegression | ln_ged_sb_dep | - [fatalities002_vdem_short](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1213) | clf_name="XGBClassifier", reg_name="XGBRegressor" | Direct multi-step | no | NA | NA | | ||
| fatalities002_wdi_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_wdi_short](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1635) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_topics_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_topics](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L82) | n_estimators=250, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_topics_xgb | XGBRegressor | ln_ged_sb_dep | - [fatalities002_topics](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L82) | n_estimators=80, learning_rate=0.05, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_topics_hurdle_lgb | HurdleRegression | ln_ged_sb_dep | - [fatalities002_topics](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L82) | clf_name="LGBMClassifier", reg_name="LGBMRegressor" | Direct multi-step | no | NA | NA | | ||
| fatalities002_joint_broad_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_joint_broad](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L2098) | n_estimators=250, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_joint_broad_hurdle_rf | HurdleRegression | ln_ged_sb_dep | - [fatalities002_joint_broad](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L2098) | clf_name="RFClassifier", reg_name="RFRegressor" | Direct multi-step | no | NA | NA | | ||
| fatalities002_joint_narrow_xgb | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_joint_narrow](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1861) | n_estimators=250, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_joint_narrow_hurdle_xgb | HurdleRegression | ln_ged_sb_dep | - [fatalities002_joint_narrow](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1861) | clf_name="XGBClassifier", reg_name="XGBRegressor" | Direct multi-step | no | NA | NA | | ||
| fatalities002_joint_narrow_hurdle_lgb | HurdleRegression | ln_ged_sb_dep | - [fatalities002_joint_narrow](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1861) | clf_name="LGBMClassifier", reg_name="LGBMRegressor" | Direct multi-step | no | NA | NA | | ||
| fatalities002_all_pca3_xgb | XGBRegressor | ln_ged_sb_dep | - [fatalities002_all_features](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3199) | n_estimators=100, learning_rate=0.05, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_aquastat_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_aquastat](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L647) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_faostat_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_faostat](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L2705) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_faoprices_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_faoprices](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L2955) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_imfweo_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_imfweo](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3021) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA | | ||
| fatalities002_Markov_glm | rf | ln_ged_sb_dep | - [fatalities002_joint_narrow](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1861) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_Markov_rf | glm | ln_ged_sb_dep | - [fatalities002_joint_narrow](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1861) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_baseline_lgbm | lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_baseline](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L34) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_conflictlong_lgbm | lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_conflictlong](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L110) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_conflictlong_hurdle_lgbm | hur_lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_conflictlong](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L110) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_escwa_drought_hurdle_lgbm | hur_lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_escwa_drought](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L283) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_escwa_drought_lgbm | lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_escwa_drought](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L283) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_natsoc_hurdle_lgbm | hur_lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_natsoc](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L451) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_natsoc_lgbm | lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_natsoc](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L451) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_broad_hurdle_lgbm | hur_lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_broad](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L614) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_broad_lgbm | lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_broad](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L614) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_conflict_history_xgb | xgb_regressor | ln_ged_sb_dep | - [fatalities002_pgm_conflict_history](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L770) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_conflict_treelag_hurdle | hur_regressor | ln_ged_sb_dep | - [fatalities002_pgm_conflict_treelag](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L1018) | None | Direct multi-step | no | NA | NA | | ||
| fatalities002_pgm_conflict_sptime_dist_hurdle | hur_regressor | ln_ged_sb_dep | - [fatalities002_pgm_conflict_sptime_dist](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L1061) | None | Direct multi-step | no | NA | NA | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
import re | ||
import ast | ||
|
||
# Define the path to the model definition file in the cloned viewsforecasting repo | ||
model_def_path = '../viewsforecasting/SystemUpdates/ModelDefinitions.py' | ||
|
||
# Define the path to the cm and pgm querysets file in the cloned viewsforecasting repo | ||
cm_querysets_path = '../viewsforecasting/Tools/cm_querysets.py' | ||
pgm_querysets_path = '../viewsforecasting/Tools/pgm_querysets.py' | ||
|
||
# The GitHub repo link | ||
# TODO: github_workflows should be changed to main when merged | ||
GITHUB_URL = 'https://github.com/prio-data/viewsforecasting/blob/github_workflows/' | ||
|
||
|
||
|
||
def convert_to_dict(input_str): | ||
""" | ||
It converts the string of every model from ModelDefinitions.py to a dict. | ||
""" | ||
# Regex pattern to match 'algorithm' value that might contain parentheses or function calls | ||
input_str = input_str.replace("'", "\"") | ||
alg_pattern = r'"algorithm":\s*(.*?),\s*(?=\n)' | ||
|
||
# Convert 'algorithm' value to string if it isn't already a string | ||
dict_str = re.sub( | ||
alg_pattern, | ||
lambda m: f'"algorithm": \'{m.group(1)}\',' if not m.group(1).startswith('"') and not m.group(1).endswith('"') else f'"algorithm": {m.group(1)},' , | ||
input_str | ||
) | ||
|
||
# Evaluate the dictionary string using ast.literal_eval | ||
try: | ||
dictionary = ast.literal_eval(dict_str) | ||
except Exception as e: | ||
print(f"Error converting string to dict: {e}") | ||
return None | ||
|
||
return dictionary | ||
|
||
|
||
|
||
def extract_models(model_def_path): | ||
""" | ||
It creates a list of dictionaries containing every model from ModelDefinitions.py. | ||
""" | ||
with open(model_def_path, 'r') as file: | ||
content = file.read() | ||
|
||
models_dict = [] | ||
model_dicts_str = re.finditer('model = {', content) | ||
|
||
for model_str in model_dicts_str: | ||
start_index = model_str.end(0) - 1 | ||
end_index = content.find("}", start_index) + 1 | ||
model_dict_str = content[start_index:end_index] | ||
model_dict = convert_to_dict(model_dict_str) | ||
models_dict.append(model_dict) | ||
return models_dict | ||
|
||
|
||
def create_link(marker, line, queryfilepath): | ||
""" | ||
Create a markdown link pointing to the line where the queryset starts. | ||
""" | ||
file = queryfilepath.split('viewsforecasting/')[1] | ||
link_template = '- [{marker}]({url}{file}#L{line})' | ||
return link_template.format(marker=marker, url=GITHUB_URL, file=file, line=line) | ||
|
||
|
||
|
||
def find_querysets(queryfilepath, model): | ||
""" | ||
Parse cm_querysets.py and find the queryset for every model and return a markdown link with the github link pointing to the right line number. | ||
""" | ||
|
||
with open(queryfilepath, 'r') as f: | ||
markers = {'file' : queryfilepath.split('viewsforecasting/')[1]} | ||
# Loop through each line in the file | ||
for i, line in enumerate(f, start=1): | ||
|
||
# Search for the pattern in the line | ||
match = re.search(r'Queryset\("' + re.escape(model['queryset']), line) | ||
|
||
if match: | ||
new_links = create_link(model['queryset'], i, queryfilepath) | ||
break # Stop after finding the first match | ||
elif model['queryset'] == 'fatalities002_all_features' and 'qs_all_features = Queryset.from_merger' in line: | ||
new_links = create_link('fatalities002_all_features', i, queryfilepath) | ||
break | ||
elif model['queryset'] == 'fatalities002_conflict_history' and 'qs_conflict = Queryset.from_merger' in line: | ||
new_links = create_link('fatalities002_conflict_history', i, queryfilepath) | ||
break | ||
elif model['queryset'] == 'fatalities002_conflict_history_long' and 'qs_conflict_long = Queryset.from_merger' in line: | ||
new_links = create_link('fatalities002_conflict_history_long', i, queryfilepath) | ||
break | ||
else: | ||
new_links = model['queryset'] | ||
|
||
return new_links | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
def generate_markdown_table(models): | ||
""" | ||
Function to generate markdown table from the model dictionaries. | ||
""" | ||
headers = ['Model Name', 'Algorithm', 'Target', 'Input Features', 'Non-default Hyperparameters', 'Forecasting Type', 'Implementation Status', 'Implementation Date', 'Author'] | ||
|
||
markdown_table = '| ' + ' '.join([f"{header} |" for header in headers]) + '\n' | ||
markdown_table += '| ' + ' '.join(['-' * len(header) + ' |' for header in headers]) + '\n' | ||
|
||
|
||
for model in models: | ||
if 'pgm' in model.get('queryset', ''): | ||
querysetname = find_querysets(pgm_querysets_path, model) | ||
else: | ||
querysetname = find_querysets(cm_querysets_path, model) | ||
|
||
|
||
row = [ | ||
model.get('modelname', ''), | ||
str(model.get('algorithm', '')).split('(')[0], | ||
model.get('depvar', ''), | ||
querysetname, | ||
re.search(r'\((.*?)\)', model.get('algorithm','')).group(1) if re.search(r'\((.*?)\)', model.get('algorithm','')) else 'None', | ||
'Direct multi-step', | ||
'no', | ||
'NA', | ||
'NA' | ||
] | ||
markdown_table += '| ' + ' | '.join(row) + ' |\n' | ||
|
||
return markdown_table | ||
|
||
models_dict = extract_models(model_def_path) | ||
markdown_table = generate_markdown_table(models_dict) | ||
|
||
with open('documentation/catalogs/cm_model_catalog.md', 'w') as f: | ||
f.write(markdown_table) | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|