Skip to content

Commit

Permalink
Merge pull request #71 from prio-data/create_cm_catalog_01
Browse files Browse the repository at this point in the history
Create cm catalog 01
  • Loading branch information
Polichinel authored Sep 25, 2024
2 parents 7165a1f + 389bac9 commit 4bbf1b3
Show file tree
Hide file tree
Showing 2 changed files with 188 additions and 0 deletions.
36 changes: 36 additions & 0 deletions documentation/catalogs/cm_model_catalog.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
| Model Name | Algorithm | Target | Input Features | Non-default Hyperparameters | Forecasting Type | Implementation Status | Implementation Date | Author |
| ---------- | --------- | ------ | -------------- | --------------------------- | ---------------- | --------------------- | ------------------- | ------ |
| fatalities002_baseline_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_baseline](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L24) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA |
| fatalities002_conflicthistory_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_conflict_history](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3087) | n_estimators=250, n_jobs=nj | Direct multi-step | no | NA | NA |
| fatalities002_conflicthistory_gbm | GradientBoostingRegressor | ln_ged_sb_dep | - [fatalities002_conflict_history](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3087) | n_estimators=200 | Direct multi-step | no | NA | NA |
| fatalities002_conflicthistory_hurdle_lgb | HurdleRegression | ln_ged_sb_dep | - [fatalities002_conflict_history](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3087) | clf_name="LGBMClassifier", reg_name="LGBMRegressor" | Direct multi-step | no | NA | NA |
| fatalities002_conflicthistory_long_xgb | XGBRegressor | ln_ged_sb_dep | - [fatalities002_conflict_history_long](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3101) | n_estimators=100, learning_rate=0.05, n_jobs=nj | Direct multi-step | no | NA | NA |
| fatalities002_vdem_hurdle_xgb | HurdleRegression | ln_ged_sb_dep | - [fatalities002_vdem_short](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1213) | clf_name="XGBClassifier", reg_name="XGBRegressor" | Direct multi-step | no | NA | NA |
| fatalities002_wdi_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_wdi_short](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1635) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA |
| fatalities002_topics_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_topics](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L82) | n_estimators=250, n_jobs=nj | Direct multi-step | no | NA | NA |
| fatalities002_topics_xgb | XGBRegressor | ln_ged_sb_dep | - [fatalities002_topics](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L82) | n_estimators=80, learning_rate=0.05, n_jobs=nj | Direct multi-step | no | NA | NA |
| fatalities002_topics_hurdle_lgb | HurdleRegression | ln_ged_sb_dep | - [fatalities002_topics](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L82) | clf_name="LGBMClassifier", reg_name="LGBMRegressor" | Direct multi-step | no | NA | NA |
| fatalities002_joint_broad_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_joint_broad](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L2098) | n_estimators=250, n_jobs=nj | Direct multi-step | no | NA | NA |
| fatalities002_joint_broad_hurdle_rf | HurdleRegression | ln_ged_sb_dep | - [fatalities002_joint_broad](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L2098) | clf_name="RFClassifier", reg_name="RFRegressor" | Direct multi-step | no | NA | NA |
| fatalities002_joint_narrow_xgb | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_joint_narrow](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1861) | n_estimators=250, n_jobs=nj | Direct multi-step | no | NA | NA |
| fatalities002_joint_narrow_hurdle_xgb | HurdleRegression | ln_ged_sb_dep | - [fatalities002_joint_narrow](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1861) | clf_name="XGBClassifier", reg_name="XGBRegressor" | Direct multi-step | no | NA | NA |
| fatalities002_joint_narrow_hurdle_lgb | HurdleRegression | ln_ged_sb_dep | - [fatalities002_joint_narrow](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1861) | clf_name="LGBMClassifier", reg_name="LGBMRegressor" | Direct multi-step | no | NA | NA |
| fatalities002_all_pca3_xgb | XGBRegressor | ln_ged_sb_dep | - [fatalities002_all_features](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3199) | n_estimators=100, learning_rate=0.05, n_jobs=nj | Direct multi-step | no | NA | NA |
| fatalities002_aquastat_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_aquastat](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L647) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA |
| fatalities002_faostat_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_faostat](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L2705) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA |
| fatalities002_faoprices_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_faoprices](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L2955) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA |
| fatalities002_imfweo_rf | XGBRFRegressor | ln_ged_sb_dep | - [fatalities002_imfweo](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L3021) | n_estimators=300, n_jobs=nj | Direct multi-step | no | NA | NA |
| fatalities002_Markov_glm | rf | ln_ged_sb_dep | - [fatalities002_joint_narrow](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1861) | None | Direct multi-step | no | NA | NA |
| fatalities002_Markov_rf | glm | ln_ged_sb_dep | - [fatalities002_joint_narrow](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/cm_querysets.py#L1861) | None | Direct multi-step | no | NA | NA |
| fatalities002_pgm_baseline_lgbm | lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_baseline](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L34) | None | Direct multi-step | no | NA | NA |
| fatalities002_pgm_conflictlong_lgbm | lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_conflictlong](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L110) | None | Direct multi-step | no | NA | NA |
| fatalities002_pgm_conflictlong_hurdle_lgbm | hur_lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_conflictlong](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L110) | None | Direct multi-step | no | NA | NA |
| fatalities002_pgm_escwa_drought_hurdle_lgbm | hur_lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_escwa_drought](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L283) | None | Direct multi-step | no | NA | NA |
| fatalities002_pgm_escwa_drought_lgbm | lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_escwa_drought](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L283) | None | Direct multi-step | no | NA | NA |
| fatalities002_pgm_natsoc_hurdle_lgbm | hur_lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_natsoc](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L451) | None | Direct multi-step | no | NA | NA |
| fatalities002_pgm_natsoc_lgbm | lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_natsoc](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L451) | None | Direct multi-step | no | NA | NA |
| fatalities002_pgm_broad_hurdle_lgbm | hur_lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_broad](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L614) | None | Direct multi-step | no | NA | NA |
| fatalities002_pgm_broad_lgbm | lgbm_regressor | ln_ged_sb_dep | - [fatalities002_pgm_broad](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L614) | None | Direct multi-step | no | NA | NA |
| fatalities002_pgm_conflict_history_xgb | xgb_regressor | ln_ged_sb_dep | - [fatalities002_pgm_conflict_history](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L770) | None | Direct multi-step | no | NA | NA |
| fatalities002_pgm_conflict_treelag_hurdle | hur_regressor | ln_ged_sb_dep | - [fatalities002_pgm_conflict_treelag](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L1018) | None | Direct multi-step | no | NA | NA |
| fatalities002_pgm_conflict_sptime_dist_hurdle | hur_regressor | ln_ged_sb_dep | - [fatalities002_pgm_conflict_sptime_dist](https://github.com/prio-data/viewsforecasting/blob/github_workflows/Tools/pgm_querysets.py#L1061) | None | Direct multi-step | no | NA | NA |
152 changes: 152 additions & 0 deletions documentation/catalogs/generate_links_to_querysets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import re
import ast

# Define the path to the model definition file in the cloned viewsforecasting repo
model_def_path = '../viewsforecasting/SystemUpdates/ModelDefinitions.py'

# Define the path to the cm and pgm querysets file in the cloned viewsforecasting repo
cm_querysets_path = '../viewsforecasting/Tools/cm_querysets.py'
pgm_querysets_path = '../viewsforecasting/Tools/pgm_querysets.py'

# The GitHub repo link
# TODO: github_workflows should be changed to main when merged
GITHUB_URL = 'https://github.com/prio-data/viewsforecasting/blob/github_workflows/'



def convert_to_dict(input_str):
"""
It converts the string of every model from ModelDefinitions.py to a dict.
"""
# Regex pattern to match 'algorithm' value that might contain parentheses or function calls
input_str = input_str.replace("'", "\"")
alg_pattern = r'"algorithm":\s*(.*?),\s*(?=\n)'

# Convert 'algorithm' value to string if it isn't already a string
dict_str = re.sub(
alg_pattern,
lambda m: f'"algorithm": \'{m.group(1)}\',' if not m.group(1).startswith('"') and not m.group(1).endswith('"') else f'"algorithm": {m.group(1)},' ,
input_str
)

# Evaluate the dictionary string using ast.literal_eval
try:
dictionary = ast.literal_eval(dict_str)
except Exception as e:
print(f"Error converting string to dict: {e}")
return None

return dictionary



def extract_models(model_def_path):
"""
It creates a list of dictionaries containing every model from ModelDefinitions.py.
"""
with open(model_def_path, 'r') as file:
content = file.read()

models_dict = []
model_dicts_str = re.finditer('model = {', content)

for model_str in model_dicts_str:
start_index = model_str.end(0) - 1
end_index = content.find("}", start_index) + 1
model_dict_str = content[start_index:end_index]
model_dict = convert_to_dict(model_dict_str)
models_dict.append(model_dict)
return models_dict


def create_link(marker, line, queryfilepath):
"""
Create a markdown link pointing to the line where the queryset starts.
"""
file = queryfilepath.split('viewsforecasting/')[1]
link_template = '- [{marker}]({url}{file}#L{line})'
return link_template.format(marker=marker, url=GITHUB_URL, file=file, line=line)



def find_querysets(queryfilepath, model):
"""
Parse cm_querysets.py and find the queryset for every model and return a markdown link with the github link pointing to the right line number.
"""

with open(queryfilepath, 'r') as f:
markers = {'file' : queryfilepath.split('viewsforecasting/')[1]}
# Loop through each line in the file
for i, line in enumerate(f, start=1):

# Search for the pattern in the line
match = re.search(r'Queryset\("' + re.escape(model['queryset']), line)

if match:
new_links = create_link(model['queryset'], i, queryfilepath)
break # Stop after finding the first match
elif model['queryset'] == 'fatalities002_all_features' and 'qs_all_features = Queryset.from_merger' in line:
new_links = create_link('fatalities002_all_features', i, queryfilepath)
break
elif model['queryset'] == 'fatalities002_conflict_history' and 'qs_conflict = Queryset.from_merger' in line:
new_links = create_link('fatalities002_conflict_history', i, queryfilepath)
break
elif model['queryset'] == 'fatalities002_conflict_history_long' and 'qs_conflict_long = Queryset.from_merger' in line:
new_links = create_link('fatalities002_conflict_history_long', i, queryfilepath)
break
else:
new_links = model['queryset']

return new_links







def generate_markdown_table(models):
"""
Function to generate markdown table from the model dictionaries.
"""
headers = ['Model Name', 'Algorithm', 'Target', 'Input Features', 'Non-default Hyperparameters', 'Forecasting Type', 'Implementation Status', 'Implementation Date', 'Author']

markdown_table = '| ' + ' '.join([f"{header} |" for header in headers]) + '\n'
markdown_table += '| ' + ' '.join(['-' * len(header) + ' |' for header in headers]) + '\n'


for model in models:
if 'pgm' in model.get('queryset', ''):
querysetname = find_querysets(pgm_querysets_path, model)
else:
querysetname = find_querysets(cm_querysets_path, model)


row = [
model.get('modelname', ''),
str(model.get('algorithm', '')).split('(')[0],
model.get('depvar', ''),
querysetname,
re.search(r'\((.*?)\)', model.get('algorithm','')).group(1) if re.search(r'\((.*?)\)', model.get('algorithm','')) else 'None',
'Direct multi-step',
'no',
'NA',
'NA'
]
markdown_table += '| ' + ' | '.join(row) + ' |\n'

return markdown_table

models_dict = extract_models(model_def_path)
markdown_table = generate_markdown_table(models_dict)

with open('documentation/catalogs/cm_model_catalog.md', 'w') as f:
f.write(markdown_table)








0 comments on commit 4bbf1b3

Please sign in to comment.