Skip to content

Commit

Permalink
Merge pull request #1386 from virtualcell/cli-utils-python-fixes
Browse files Browse the repository at this point in the history
Fixes to vcell-cli-utils
  • Loading branch information
CodeByDrescher authored Nov 21, 2024
2 parents 157032f + a29f1f0 commit 3091923
Show file tree
Hide file tree
Showing 5 changed files with 180 additions and 127 deletions.
111 changes: 40 additions & 71 deletions vcell-cli-utils/vcell_cli_utils/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@
# Create temp directory
tmp_dir = tempfile.mkdtemp()


def validate_omex(omex_file_path: str, temp_dir_path: str, omex_json_report_path: str) -> None:
def validate_omex(omex_file_path: str, temp_dir_path: str, omex_json_report_path: str) -> str:
if not os.path.exists(temp_dir_path):
os.mkdir(temp_dir_path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)

Expand Down Expand Up @@ -57,17 +56,18 @@ def validate_omex(omex_file_path: str, temp_dir_path: str, omex_json_report_path
config=config
)

with open(omex_json_report_path, "w") as file:
file.write(json.dumps(
{
results_dict = {
"parse_errors": reader.errors,
"parse_warnings": reader.warnings,
"validator_errors": validator_errors,
"validator_warnings": validator_warnings
}, indent=2))
}
with open(omex_json_report_path, "w") as file:
file.write(json.dumps(results_dict, indent=2))
return repr(results_dict)


def gen_sedml_2d_3d(omex_file_path, base_out_path):
def gen_sedml_2d_3d(omex_file_path, base_out_path) -> str:

temp_path = os.path.join(base_out_path, "temp")
if not os.path.exists(temp_path):
Expand Down Expand Up @@ -132,11 +132,13 @@ def gen_sedml_2d_3d(omex_file_path, base_out_path):
return temp_path


def transpose_vcml_csv(csv_file_path: str):
def transpose_vcml_csv(csv_file_path: str) -> str:
df = pd.read_csv(csv_file_path, header=None)
cols = list(df.columns)
final_cols = [col for col in cols if col != '']
df[final_cols].transpose().to_csv(csv_file_path, header=False, index=False)
print("Success!")
return csv_file_path


def get_all_dataref_and_curves(sedml_path):
Expand Down Expand Up @@ -180,6 +182,8 @@ def get_report_label_from_data_ref(dataref: str, all_report_dataref):
return report, data_ref['data_label']




# Update plots dict

def update_dataref_with_report_label(all_report_dataref, all_plot_curves):
Expand All @@ -202,16 +206,15 @@ def get_report_dataframes(all_report_dataref, result_out_dir):
report_frames = {}
reports_list = list(set(all_report_dataref.keys()))
for report in reports_list:
report_frames[report] = pd.read_csv(os.path.join(
result_out_dir, report + ".csv")).T.reset_index()
report_frames[report] = pd.read_csv(str(os.path.join(result_out_dir, report + ".csv"))).T.reset_index()
report_frames[report].columns = report_frames[report].iloc[0].values
report_frames[report].drop(index=0, inplace=True)
return report_frames

# PLOTTING


def plot_and_save_curves(all_plot_curves, report_frames, result_out_dir):
def plot_and_save_curves(all_plot_curves, report_frames, result_out_dir) -> str:
all_plots = dict(all_plot_curves)
for plot, curve_dat in all_plots.items():
dims = (12, 8)
Expand All @@ -226,6 +229,7 @@ def plot_and_save_curves(all_plot_curves, report_frames, result_out_dir):
ax.set_ylabel('')
# plt.show()
plt.savefig(os.path.join(result_out_dir, plot + '.pdf'), dpi=300)
return os.path.join(result_out_dir, plot + '.pdf')


def gen_plot_pdfs(sedml_path, result_out_dir):
Expand All @@ -234,61 +238,11 @@ def gen_plot_pdfs(sedml_path, result_out_dir):
all_report_dataref, all_plot_curves = update_dataref_with_report_label(
all_report_dataref, all_plot_curves)
report_frames = get_report_dataframes(all_report_dataref, result_out_dir)
plot_and_save_curves(all_plot_curves, report_frames, result_out_dir)
print("Success!")
return plot_and_save_curves(all_plot_curves, report_frames, result_out_dir)


def gen_plots_for_sed2d_only_2(sedml_path, result_out_dir):
sedml: biosimulators_utils.sedml.data_model.SedDocument \
= biosimulators_utils.sedml.io.SedmlSimulationReader().run(sedml_path)

for plot in [output for output in sedml.outputs if isinstance(output, Plot2D)]:
dims = (12, 8)
_, ax = plt.subplots(figsize=dims)


df = pd.read_csv(os.path.join(result_out_dir, plot.id + '.csv'), header=None).T

# create mapping from task to all repeated tasks (or just itself)
curve_id_mapping = {}
for elem in df.iloc[1]:
if elem not in curve_id_mapping:
curve_id_mapping[elem] = []
curve_id_mapping[elem].append(str(elem) + "_" + str(len(curve_id_mapping[elem])))

labels = []
for key in curve_id_mapping:
if len(curve_id_mapping[key]) == 1:
curve_id_mapping[key][0] = key # If there wasn't repeated tasks, restore the old name
for elem in curve_id_mapping[key]:
labels.append(elem)

# format data frame
df.columns = labels
df.drop(0, inplace=True)
df.drop(1, inplace=True)
df.drop(2, inplace=True)
df.reset_index(inplace=True)
df.drop('index', axis=1, inplace=True)

with open("/home/ldrescher/DataFrameFile.df", "w+") as debug_file:
debug_file.write(repr(df))

for curve in plot.curves:
should_label = True
for curve_name in curve_id_mapping[curve.y_data_generator.id]:
if curve.x_data_generator.id in labels:
raise(f"Can not find x data set `{curve.x_data_generator.id}` in data frame (legal set: {labels})")
if curve_name not in labels:
raise(f"Can not find y data set `{curve_name}` in data frame (legal set: {labels})")
sns.lineplot(data=df, x=curve.x_data_generator.id, y=curve_name, ax=ax,
label=(curve.id if should_label else None))
ax.set_ylabel('')
should_label = False
plt.savefig(os.path.join(result_out_dir, plot.id + '.pdf'), dpi=300)

def gen_plots_for_sed2d_only(sedml_path, result_out_dir):
all_plot_curves = {}

sedml: SedDocument = SedmlSimulationReader().run(sedml_path)

# Generate all_plot_curves
Expand All @@ -302,7 +256,8 @@ def gen_plots_for_sed2d_only(sedml_path, result_out_dir):
for curve in sed_plot_2d.curves:
all_curves[curve.id] = {
'x': curve.x_data_generator,
'y': curve.y_data_generator
'y': curve.y_data_generator,
'name': curve.name
}
all_plot_curves[sed_plot_2d.id] = all_curves

Expand All @@ -325,25 +280,39 @@ def gen_plots_for_sed2d_only(sedml_path, result_out_dir):
labels = []
for key in labelMap:
if len(labelMap[key]) == 1:
labelMap[key][0] = key # If there wasn't repreated tasks, restore the old name
labelMap[key][0] = key # If there wasn't repeated tasks, restore the old name
for elem in labelMap[key]:
labels.append(elem)

# format data frame
df.columns = labels
df.drop(0, inplace=True)
df.drop(1, inplace=True)
df.drop(2, inplace=True)
df.columns = labels
labels_df = df.copy()

df.drop(df.index[:3], inplace=True)
labels_df.drop(labels_df.index[:2], inplace=True)
labels_df.drop(labels_df.index[1:], inplace=True)

df.reset_index(inplace=True)
df.drop('index', axis=1, inplace=True)

for curve_id, data in curve_dat_dict.items(): # data <--> (dict)all_curves.values()
shouldLabel = True
for series_name in labelMap[data['y'].id]:
sns.lineplot(data=df, x=data['x'].id, y=series_name, ax=ax, label=(curve_id if shouldLabel else None))
x_axis_id = data['x'].id
y_axis_id = data['y'].id
x_data_sets = labelMap[x_axis_id]
y_data_sets = labelMap[y_axis_id]

for i in range(len(y_data_sets)):
series_name = y_data_sets[i]
x_data_set = x_data_sets[0] if len(x_data_sets) == 1 else x_data_sets[i]
label_name = data['name'] if data['name'] is not None else curve_id
sns.lineplot(data=df, x=x_data_set, y=series_name, ax=ax, label=(label_name if shouldLabel else None))
ax.set_ylabel('')
ax.set_xlabel(labels_df.at[labels_df.index[0], x_data_set])
shouldLabel = False
plt.savefig(os.path.join(result_out_dir, plot_id + '.pdf'), dpi=300)
print("Success!")
return result_out_dir


if __name__ == "__main__":
Expand Down
32 changes: 24 additions & 8 deletions vcell-cli-utils/vcell_cli_utils/status.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def extract_omex_archive(omex_file) -> list:
return sedml_files_list


def status_yml(omex_file: str, out_dir: str) -> None:
def status_yml(omex_file: str, out_dir: str) -> str:
yaml_dicts: List[Dict] = []
for sedml in extract_omex_archive(omex_file):
outputs_dict: Dict[str, List[Dict]] = {"outputs": []}
Expand Down Expand Up @@ -100,6 +100,8 @@ def status_yml(omex_file: str, out_dir: str) -> None:
sy.write(yaml.dump(final_dict))
# "return" final_dict
shutil.rmtree(tmp_dir)
print("Success!")
return "Success!"


def get_yaml_as_str(yaml_path: str) -> dict:
Expand All @@ -125,7 +127,7 @@ def dump_json_dict(json_path: str,yaml_dict: dict):
json.dump(yaml_dict,json_out,sort_keys=True,indent=4)


def update_task_status(sedml: str, task: str, status: str, out_dir: str, duration: int, algorithm: str) -> None:
def update_task_status(sedml: str, task: str, status: str, out_dir: str, duration: int, algorithm: str) -> str:
# Hardcoded because name is static
yaml_dict = get_yaml_as_str(os.path.join(out_dir, "log.yml"))
for sedml_list in yaml_dict['sedDocuments']:
Expand All @@ -146,9 +148,11 @@ def update_task_status(sedml: str, task: str, status: str, out_dir: str, duratio
status_yaml_path = os.path.join(out_dir, "log.yml")
# Convert json to yaml # Save new yaml
dump_yaml_dict(status_yaml_path, yaml_dict=yaml_dict, out_dir=out_dir)
print("Success!")
return "Success!"


def update_sedml_doc_status(sedml: str, status: str, out_dir: str) -> None:
def update_sedml_doc_status(sedml: str, status: str, out_dir: str) -> str:
# Hardcoded because name is static
yaml_dict = get_yaml_as_str(os.path.join(out_dir, "log.yml"))
for sedml_list in yaml_dict['sedDocuments']:
Expand All @@ -158,19 +162,23 @@ def update_sedml_doc_status(sedml: str, status: str, out_dir: str) -> None:

status_yaml_path = os.path.join(out_dir, "log.yml")
dump_yaml_dict(status_yaml_path, yaml_dict=yaml_dict, out_dir=out_dir)
print("Success!")
return "Success!"


def update_omex_status(status: str, out_dir: str, duration: int) -> None:
def update_omex_status(status: str, out_dir: str, duration: int) -> str:

yaml_dict = get_yaml_as_str(os.path.join(out_dir, "log.yml"))
yaml_dict['status'] = status
yaml_dict['duration'] = duration

status_yaml_path = os.path.join(out_dir, "log.yml")
dump_yaml_dict(status_yaml_path, yaml_dict=yaml_dict, out_dir=out_dir)
print("Success!")
return "Success!"


def update_dataset_status(sedml: str, report: str, dataset: str, status: str, out_dir: str) -> None:
def update_dataset_status(sedml: str, report: str, dataset: str, status: str, out_dir: str) -> str:
yaml_dict = get_yaml_as_str(os.path.join(out_dir, "log.yml"))
for sedml_list in yaml_dict['sedDocuments']:
if sedml.endswith(sedml_list["location"]):
Expand Down Expand Up @@ -206,9 +214,11 @@ def update_dataset_status(sedml: str, report: str, dataset: str, status: str, ou

# Convert json to yaml # Save new yaml
dump_yaml_dict(status_yaml_path, yaml_dict=yaml_dict, out_dir=out_dir)
print("Success!")
return "Success!"


def update_plot_status(sedml: str, plot_id: str, status: str, out_dir: str) -> None:
def update_plot_status(sedml: str, plot_id: str, status: str, out_dir: str) -> str:
yaml_dict = get_yaml_as_str(os.path.join(out_dir, "log.yml"))
for sedml_list in yaml_dict['sedDocuments']:
if sedml.endswith(sedml_list["location"]):
Expand Down Expand Up @@ -243,14 +253,16 @@ def update_plot_status(sedml: str, plot_id: str, status: str, out_dir: str) -> N

# Convert json to yaml # Save new yaml
dump_yaml_dict(status_yaml_path, yaml_dict=yaml_dict, out_dir=out_dir)
print("Success!")
return "Success!"

#
# sedmlAbsolutePath - full path to location of the actual sedml file (document) used as input
# entityId - (actually, the name) ex: task_0_0 for task, or biomodel_20754836.sedml for a sedml document
# outDir - path to directory where the log files will be placed
# entityType - string describing the entity type ex "task" for a task, or "sedml" for sedml document
#
def set_output_message(sedmlAbsolutePath: str, entityId: str, out_dir: str, entityType: str , message: str) -> None:
def set_output_message(sedmlAbsolutePath: str, entityId: str, out_dir: str, entityType: str , message: str) -> str:

yaml_dict = get_yaml_as_str(os.path.join(out_dir, "log.yml"))
if entityType == 'omex':
Expand All @@ -274,8 +286,10 @@ def set_output_message(sedmlAbsolutePath: str, entityId: str, out_dir: str, enti
status_yaml_path = os.path.join(out_dir, "log.yml")
# Convert json to yaml # Save new yaml
dump_yaml_dict(status_yaml_path, yaml_dict=yaml_dict, out_dir=out_dir)
print("Success!")
return "Success!"

def set_exception_message(sedmlAbsolutePath: str, entityId: str, out_dir: str, entityType: str, type: str, message: str) -> None:
def set_exception_message(sedmlAbsolutePath: str, entityId: str, out_dir: str, entityType: str, type: str, message: str) -> str:

yaml_dict = get_yaml_as_str(os.path.join(out_dir, "log.yml"))
for sedml_list in yaml_dict['sedDocuments']:
Expand Down Expand Up @@ -304,6 +318,8 @@ def set_exception_message(sedmlAbsolutePath: str, entityId: str, out_dir: str, e

# Convert json to yaml # Save new yaml
dump_yaml_dict(status_yaml_path, yaml_dict=yaml_dict, out_dir=out_dir)
print("Success!")
return "Success!"


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 3091923

Please sign in to comment.