From 41751dfffb761b582e8744df032553b7b0466e29 Mon Sep 17 00:00:00 2001 From: evanfeinberg Date: Mon, 8 Aug 2016 18:16:43 -0700 Subject: [PATCH 1/2] added construct_graph routine from conformation --- msmexplorer/plots/msm.py | 130 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) diff --git a/msmexplorer/plots/msm.py b/msmexplorer/plots/msm.py index 880a361..34a3d5d 100644 --- a/msmexplorer/plots/msm.py +++ b/msmexplorer/plots/msm.py @@ -96,3 +96,133 @@ def plot_timescales(msm, n_timescales=None, error=None, sigma=2, colors=None, tick.label.set_fontsize(16) return ax + +def construct_graph(msm_modeler_dir, clusterer_dir, n_clusters, tica_lag_time=5, msm_lag_time=10, graph_file="~/graph_file.graphml", msm_object=None, clusterer_object=None, + inactive = None, active = None, pnas_clusters_averages = None, + tica_clusters_averages = None, docking=None, macrostate = None, + cluster_attributes=None, msm_attributes=None): + + """ + Construct a .graphml graph based on an MSM and attributes of clusters and/or MSM states. + Saves .graphml graph to disk and returns it as well. + + *needs networkx python package to use* + + Parameters + ---------- + msm_modeler_dir: location on disk of verboseload loadable msm object + clusterer_dir: location on disk of verboseload loadable clusterer object + n_clusters: number of clusters + tica_lag_time: tica lag time + msm_lag_time: msm lag time + graph_file: location on disk for saving graphml file + msm_object: pass msm object directly instead of loading from disk + clusterer_object: pass clusterer object directly instead of loading from disk + cluster_attributes: dictionary that maps names of attributes to lists of size n_clusters + where each entry in the list is the value of that attribute for that cluster. for example, + if n_clusters=3, an example cluster_attributes dict might be: + cluster_attributes = {'tyr75-his319_dist': [7.0, 6.0, 8.0], 'phe289-chi2': [90.0, 93.0, 123.2]} + msm_attributes: dictionary that maps names of attributes to lists of size n_msm_states + where each entry in the list is the value of that attribute for that msm state. for example, + if n_msm_states=3, an example cluster_attributes dict might be: + msm_attributes = {'tyr75-his319_dist': [7.0, 6.0, 8.0], 'phe289-chi2': [90.0, 93.0, 123.2]} + """ + + if clusterer_object is None: + clusterer = verboseload(clusterer_dir) + else: + clusterer = clusterer_object + n_clusters = np.shape(clusterer.cluster_centers_)[0] + + labels = clusterer.labels_ + + if not os.path.exists(msm_modeler_dir): + if msm_object is not None: + msm_modeler = msm_object + else: + msm_modeler = MarkovStateModel(lag_time=msm_lag_time, n_timescales = 5, sliding_window = True, verbose = True) + print(("fitting msm to trajectories with %d clusters and lag_time %d" %(n_clusters, msm_lag_time))) + msm_modeler.fit_transform(labels) + verbosedump(msm_modeler, msm_modeler_dir) + else: + msm_modeler = verboseload(msm_modeler_dir) + graph = nx.DiGraph() + mapping = msm_modeler.mapping_ + inv_mapping = {v: k for k, v in list(mapping.items())} + transmat = msm_modeler.transmat_ + + for i in range(0, msm_modeler.n_states_): + for j in range(0, msm_modeler.n_states_): + prob = transmat[i][j] + if prob > 0.0: + if prob < 0.001: prob = 0.001 + original_i = inv_mapping[i] + original_j = inv_mapping[j] + graph.add_edge(original_i, original_j, prob = float(prob), inverse_prob = 1.0 / float(prob)) + + print("Number of nodes in graph:") + print((graph.number_of_nodes())) + + if inactive is not None: + scores = convert_csv_to_map_nocombine(inactive) + for cluster in list(scores.keys()): + cluster_id = int(cluster[7:len(cluster)]) + if cluster_id in graph.nodes(): + score = scores[cluster][0] + graph.node[cluster_id]["inactive_pnas"] = score + + if active is not None: + scores = convert_csv_to_map_nocombine(active) + for cluster in list(scores.keys()): + cluster_id = int(re.search(r'\d+',cluster).group()) + if cluster_id in graph.nodes(): + score = scores[cluster][0] + graph.node[cluster_id]["active_pnas"] = score + + if pnas_clusters_averages is not None: + scores = convert_csv_to_map_nocombine(pnas_clusters_averages) + for cluster in list(scores.keys()): + cluster_id = int(re.search(r'\d+',cluster).group()) + if cluster_id in graph.nodes(): + graph.node[cluster_id]["tm6_tm3_dist"] = scores[cluster][0] + graph.node[cluster_id]["rmsd_npxxy_active"] = scores[cluster][2] + graph.node[cluster_id]["rmsd_connector_active"] = scores[cluster][4] + + if tica_clusters_averages is not None: + scores = convert_csv_to_map_nocombine(tica_clusters_averages) + for cluster in list(scores.keys()): + cluster_id = int(re.search(r'\d+',cluster).group()) + if cluster_id in graph.nodes(): + for i in range(0,len(scores[cluster])): + graph.node[cluster_id]["tIC%d" %(i+1)] = scores[cluster][i] + + if docking is not None: + scores = convert_csv_to_map_nocombine(docking) + for cluster in list(scores.keys()): + cluster_id = int(cluster[7:len(cluster)]) + if cluster_id in graph.nodes(): + score = scores[cluster][0] + graph.node[cluster_id]["docking"] = score + + if macrostate is not None: + macromodel = verboseload(macrostate) + for cluster_id in range(0, n_clusters): + if cluster_id in graph.nodes(): + microstate_cluster_id = mapping[cluster_id] + macrostate_cluster_id = macromodel.microstate_mapping_[microstate_cluster_id] + #print(macrostate_cluster_id) + graph.node[cluster_id]["macrostate"] = int(macrostate_cluster_id) + + if cluster_attributes is not None: + for attribute in cluster_attributes.keys(): + for cluster_id in mapping.keys(): + graph.node[cluster_id][attribute] = float(cluster_attributes[attribute][cluster_id]) + + + if msm_attributes is not None: + for attribute in msm_attributes.keys(): + for cluster_id in mapping.keys(): + graph.node[cluster_id][attribute] = float(msm_attributes[attribute][mapping[cluster_id]]) + + nx.write_graphml(graph, graph_file) + return(graph) From d08c82ab31cfff62a6a9332c08059d5498b2a867 Mon Sep 17 00:00:00 2001 From: evanfeinberg Date: Mon, 8 Aug 2016 18:24:09 -0700 Subject: [PATCH 2/2] fixed spaces --- msmexplorer/plots/msm.py | 216 +++++++++++++++++++-------------------- 1 file changed, 108 insertions(+), 108 deletions(-) diff --git a/msmexplorer/plots/msm.py b/msmexplorer/plots/msm.py index 34a3d5d..32c432f 100644 --- a/msmexplorer/plots/msm.py +++ b/msmexplorer/plots/msm.py @@ -98,10 +98,10 @@ def plot_timescales(msm, n_timescales=None, error=None, sigma=2, colors=None, return ax def construct_graph(msm_modeler_dir, clusterer_dir, n_clusters, tica_lag_time=5, msm_lag_time=10, graph_file="~/graph_file.graphml", msm_object=None, clusterer_object=None, - inactive = None, active = None, pnas_clusters_averages = None, - tica_clusters_averages = None, docking=None, macrostate = None, - cluster_attributes=None, msm_attributes=None): - + inactive = None, active = None, pnas_clusters_averages = None, + tica_clusters_averages = None, docking=None, macrostate = None, + cluster_attributes=None, msm_attributes=None): + """ Construct a .graphml graph based on an MSM and attributes of clusters and/or MSM states. Saves .graphml graph to disk and returns it as well. @@ -119,110 +119,110 @@ def construct_graph(msm_modeler_dir, clusterer_dir, n_clusters, tica_lag_time=5, msm_object: pass msm object directly instead of loading from disk clusterer_object: pass clusterer object directly instead of loading from disk cluster_attributes: dictionary that maps names of attributes to lists of size n_clusters - where each entry in the list is the value of that attribute for that cluster. for example, - if n_clusters=3, an example cluster_attributes dict might be: - cluster_attributes = {'tyr75-his319_dist': [7.0, 6.0, 8.0], 'phe289-chi2': [90.0, 93.0, 123.2]} + where each entry in the list is the value of that attribute for that cluster. for example, + if n_clusters=3, an example cluster_attributes dict might be: + cluster_attributes = {'tyr75-his319_dist': [7.0, 6.0, 8.0], 'phe289-chi2': [90.0, 93.0, 123.2]} msm_attributes: dictionary that maps names of attributes to lists of size n_msm_states - where each entry in the list is the value of that attribute for that msm state. for example, - if n_msm_states=3, an example cluster_attributes dict might be: - msm_attributes = {'tyr75-his319_dist': [7.0, 6.0, 8.0], 'phe289-chi2': [90.0, 93.0, 123.2]} + where each entry in the list is the value of that attribute for that msm state. for example, + if n_msm_states=3, an example cluster_attributes dict might be: + msm_attributes = {'tyr75-his319_dist': [7.0, 6.0, 8.0], 'phe289-chi2': [90.0, 93.0, 123.2]} """ - if clusterer_object is None: - clusterer = verboseload(clusterer_dir) - else: - clusterer = clusterer_object - n_clusters = np.shape(clusterer.cluster_centers_)[0] - - labels = clusterer.labels_ - - if not os.path.exists(msm_modeler_dir): - if msm_object is not None: - msm_modeler = msm_object - else: - msm_modeler = MarkovStateModel(lag_time=msm_lag_time, n_timescales = 5, sliding_window = True, verbose = True) - print(("fitting msm to trajectories with %d clusters and lag_time %d" %(n_clusters, msm_lag_time))) - msm_modeler.fit_transform(labels) - verbosedump(msm_modeler, msm_modeler_dir) - else: - msm_modeler = verboseload(msm_modeler_dir) - graph = nx.DiGraph() - mapping = msm_modeler.mapping_ - inv_mapping = {v: k for k, v in list(mapping.items())} - transmat = msm_modeler.transmat_ - - for i in range(0, msm_modeler.n_states_): - for j in range(0, msm_modeler.n_states_): - prob = transmat[i][j] - if prob > 0.0: - if prob < 0.001: prob = 0.001 - original_i = inv_mapping[i] - original_j = inv_mapping[j] - graph.add_edge(original_i, original_j, prob = float(prob), inverse_prob = 1.0 / float(prob)) - - print("Number of nodes in graph:") - print((graph.number_of_nodes())) - - if inactive is not None: - scores = convert_csv_to_map_nocombine(inactive) - for cluster in list(scores.keys()): - cluster_id = int(cluster[7:len(cluster)]) - if cluster_id in graph.nodes(): - score = scores[cluster][0] - graph.node[cluster_id]["inactive_pnas"] = score - - if active is not None: - scores = convert_csv_to_map_nocombine(active) - for cluster in list(scores.keys()): - cluster_id = int(re.search(r'\d+',cluster).group()) - if cluster_id in graph.nodes(): - score = scores[cluster][0] - graph.node[cluster_id]["active_pnas"] = score - - if pnas_clusters_averages is not None: - scores = convert_csv_to_map_nocombine(pnas_clusters_averages) - for cluster in list(scores.keys()): - cluster_id = int(re.search(r'\d+',cluster).group()) - if cluster_id in graph.nodes(): - graph.node[cluster_id]["tm6_tm3_dist"] = scores[cluster][0] - graph.node[cluster_id]["rmsd_npxxy_active"] = scores[cluster][2] - graph.node[cluster_id]["rmsd_connector_active"] = scores[cluster][4] - - if tica_clusters_averages is not None: - scores = convert_csv_to_map_nocombine(tica_clusters_averages) - for cluster in list(scores.keys()): - cluster_id = int(re.search(r'\d+',cluster).group()) - if cluster_id in graph.nodes(): - for i in range(0,len(scores[cluster])): - graph.node[cluster_id]["tIC%d" %(i+1)] = scores[cluster][i] - - if docking is not None: - scores = convert_csv_to_map_nocombine(docking) - for cluster in list(scores.keys()): - cluster_id = int(cluster[7:len(cluster)]) - if cluster_id in graph.nodes(): - score = scores[cluster][0] - graph.node[cluster_id]["docking"] = score - - if macrostate is not None: - macromodel = verboseload(macrostate) - for cluster_id in range(0, n_clusters): - if cluster_id in graph.nodes(): - microstate_cluster_id = mapping[cluster_id] - macrostate_cluster_id = macromodel.microstate_mapping_[microstate_cluster_id] - #print(macrostate_cluster_id) - graph.node[cluster_id]["macrostate"] = int(macrostate_cluster_id) - - if cluster_attributes is not None: - for attribute in cluster_attributes.keys(): - for cluster_id in mapping.keys(): - graph.node[cluster_id][attribute] = float(cluster_attributes[attribute][cluster_id]) - - - if msm_attributes is not None: - for attribute in msm_attributes.keys(): - for cluster_id in mapping.keys(): - graph.node[cluster_id][attribute] = float(msm_attributes[attribute][mapping[cluster_id]]) - - nx.write_graphml(graph, graph_file) - return(graph) + if clusterer_object is None: + clusterer = verboseload(clusterer_dir) + else: + clusterer = clusterer_object + n_clusters = np.shape(clusterer.cluster_centers_)[0] + + labels = clusterer.labels_ + + if not os.path.exists(msm_modeler_dir): + if msm_object is not None: + msm_modeler = msm_object + else: + msm_modeler = MarkovStateModel(lag_time=msm_lag_time, n_timescales = 5, sliding_window = True, verbose = True) + print(("fitting msm to trajectories with %d clusters and lag_time %d" %(n_clusters, msm_lag_time))) + msm_modeler.fit_transform(labels) + verbosedump(msm_modeler, msm_modeler_dir) + else: + msm_modeler = verboseload(msm_modeler_dir) + graph = nx.DiGraph() + mapping = msm_modeler.mapping_ + inv_mapping = {v: k for k, v in list(mapping.items())} + transmat = msm_modeler.transmat_ + + for i in range(0, msm_modeler.n_states_): + for j in range(0, msm_modeler.n_states_): + prob = transmat[i][j] + if prob > 0.0: + if prob < 0.001: prob = 0.001 + original_i = inv_mapping[i] + original_j = inv_mapping[j] + graph.add_edge(original_i, original_j, prob = float(prob), inverse_prob = 1.0 / float(prob)) + + print("Number of nodes in graph:") + print((graph.number_of_nodes())) + + if inactive is not None: + scores = convert_csv_to_map_nocombine(inactive) + for cluster in list(scores.keys()): + cluster_id = int(cluster[7:len(cluster)]) + if cluster_id in graph.nodes(): + score = scores[cluster][0] + graph.node[cluster_id]["inactive_pnas"] = score + + if active is not None: + scores = convert_csv_to_map_nocombine(active) + for cluster in list(scores.keys()): + cluster_id = int(re.search(r'\d+',cluster).group()) + if cluster_id in graph.nodes(): + score = scores[cluster][0] + graph.node[cluster_id]["active_pnas"] = score + + if pnas_clusters_averages is not None: + scores = convert_csv_to_map_nocombine(pnas_clusters_averages) + for cluster in list(scores.keys()): + cluster_id = int(re.search(r'\d+',cluster).group()) + if cluster_id in graph.nodes(): + graph.node[cluster_id]["tm6_tm3_dist"] = scores[cluster][0] + graph.node[cluster_id]["rmsd_npxxy_active"] = scores[cluster][2] + graph.node[cluster_id]["rmsd_connector_active"] = scores[cluster][4] + + if tica_clusters_averages is not None: + scores = convert_csv_to_map_nocombine(tica_clusters_averages) + for cluster in list(scores.keys()): + cluster_id = int(re.search(r'\d+',cluster).group()) + if cluster_id in graph.nodes(): + for i in range(0,len(scores[cluster])): + graph.node[cluster_id]["tIC%d" %(i+1)] = scores[cluster][i] + + if docking is not None: + scores = convert_csv_to_map_nocombine(docking) + for cluster in list(scores.keys()): + cluster_id = int(cluster[7:len(cluster)]) + if cluster_id in graph.nodes(): + score = scores[cluster][0] + graph.node[cluster_id]["docking"] = score + + if macrostate is not None: + macromodel = verboseload(macrostate) + for cluster_id in range(0, n_clusters): + if cluster_id in graph.nodes(): + microstate_cluster_id = mapping[cluster_id] + macrostate_cluster_id = macromodel.microstate_mapping_[microstate_cluster_id] + #print(macrostate_cluster_id) + graph.node[cluster_id]["macrostate"] = int(macrostate_cluster_id) + + if cluster_attributes is not None: + for attribute in cluster_attributes.keys(): + for cluster_id in mapping.keys(): + graph.node[cluster_id][attribute] = float(cluster_attributes[attribute][cluster_id]) + + + if msm_attributes is not None: + for attribute in msm_attributes.keys(): + for cluster_id in mapping.keys(): + graph.node[cluster_id][attribute] = float(msm_attributes[attribute][mapping[cluster_id]]) + + nx.write_graphml(graph, graph_file) + return(graph)