From 1607df04eaf00bd5a7409a1013dc468c990e074b Mon Sep 17 00:00:00 2001 From: Ekaterina Kazantseva Date: Mon, 12 Aug 2024 16:17:21 +0300 Subject: [PATCH] ref --- strainy/clustering/cluster.py | 2 +- strainy/clustering/cluster_postprocess.py | 2 +- .../asm_graph_ops.py | 2 +- .../gfa_ops.py | 0 .../overlap_graph_ops.py | 45 +++++++++++-------- strainy/transform.py | 10 ++--- 6 files changed, 34 insertions(+), 27 deletions(-) rename strainy/{gfa_operations => graph_operations}/asm_graph_ops.py (98%) rename strainy/{gfa_operations => graph_operations}/gfa_ops.py (100%) rename strainy/{gfa_operations => graph_operations}/overlap_graph_ops.py (93%) diff --git a/strainy/clustering/cluster.py b/strainy/clustering/cluster.py index cbba68d..6c63879 100644 --- a/strainy/clustering/cluster.py +++ b/strainy/clustering/cluster.py @@ -12,7 +12,7 @@ from strainy.clustering import build_data as build_data from strainy.clustering import build_adj_matrix as matrix from strainy.params import * -import strainy.gfa_operations.gfa_ops as gfa_ops +import strainy.graph_operations.gfa_ops as gfa_ops logger = logging.getLogger() diff --git a/strainy/clustering/cluster_postprocess.py b/strainy/clustering/cluster_postprocess.py index fee7b94..7ddd126 100644 --- a/strainy/clustering/cluster_postprocess.py +++ b/strainy/clustering/cluster_postprocess.py @@ -4,7 +4,7 @@ from strainy.clustering.community_detection import find_communities from strainy.clustering import build_adj_matrix as matrix from strainy.clustering import build_data -from strainy.gfa_operations import gfa_ops +from strainy.graph_operations import gfa_ops from strainy.params import * logger = logging.getLogger() diff --git a/strainy/gfa_operations/asm_graph_ops.py b/strainy/graph_operations/asm_graph_ops.py similarity index 98% rename from strainy/gfa_operations/asm_graph_ops.py rename to strainy/graph_operations/asm_graph_ops.py index 2bcf28c..1af5d2e 100644 --- a/strainy/gfa_operations/asm_graph_ops.py +++ b/strainy/graph_operations/asm_graph_ops.py @@ -1,5 +1,5 @@ import logging -from strainy.gfa_operations import gfa_ops +from strainy.graph_operations import gfa_ops from strainy.unitig_statistics import utg_stats from strainy.clustering import build_data from strainy.params import * diff --git a/strainy/gfa_operations/gfa_ops.py b/strainy/graph_operations/gfa_ops.py similarity index 100% rename from strainy/gfa_operations/gfa_ops.py rename to strainy/graph_operations/gfa_ops.py diff --git a/strainy/gfa_operations/overlap_graph_ops.py b/strainy/graph_operations/overlap_graph_ops.py similarity index 93% rename from strainy/gfa_operations/overlap_graph_ops.py rename to strainy/graph_operations/overlap_graph_ops.py index 2563ee7..d33176b 100644 --- a/strainy/gfa_operations/overlap_graph_ops.py +++ b/strainy/graph_operations/overlap_graph_ops.py @@ -2,8 +2,8 @@ import pygraphviz as gv from collections import Counter, deque, defaultdict import logging -from strainy.gfa_operations import gfa_ops -from strainy.gfa_operations import asm_graph_ops +from strainy.graph_operations import gfa_ops +from strainy.graph_operations import asm_graph_ops from strainy.params import * @@ -18,7 +18,6 @@ """ - def build_paths_graph(cons, full_paths_roots, full_paths_leafs, cluster_distances): """ Create an "overlap" graph for clusters within a unitig, based on flye distance @@ -26,18 +25,19 @@ def build_paths_graph(cons, full_paths_roots, full_paths_leafs, cluster_distance M = cluster_distances G = gfa_ops.from_pandas_adjacency_notinplace(M, create_using = nx.DiGraph) G.remove_edges_from(list(nx.selfloop_edges(G))) + G = remove_nested(G, cons) try: G.remove_node(0) except: pass - #todo move it to parental function G, full_paths_roots, full_paths_leafs = \ remove_leaf_root_subnodes(G,full_paths_roots,full_paths_leafs) - G = remove_nested(G, cons) G = remove_transitive(G) return G + + def remove_transitive(G): path_remove = [] for node in G.nodes(): @@ -47,35 +47,43 @@ def remove_transitive(G): if len(n_path) == 3: path_remove.append(n_path) for n_path in path_remove: - try: - G.remove_edge(n_path[0], n_path[1]) - except: - continue + try: + G.remove_edge(n_path[0], n_path[1]) + except: + continue return G + + def remove_leaf_root_subnodes(G,full_paths_roots,full_paths_leafs): node_remove = [] - for node in full_paths_leafs+full_paths_roots: - if node in full_paths_leafs: - neighbors = list(full_paths_leafs) - else: - neighbors = list(full_paths_roots) + for node in full_paths_leafs: + neighbors = list(full_paths_leafs) for neighbor in list(neighbors): - for n_path in nx.algorithms.all_simple_paths(G, node, neighbor, cutoff = 2): + for n_path in nx.algorithms.all_simple_paths(G, node, neighbor, cutoff=2): + print(n_path) if len(n_path) == 2: node_remove.append(neighbor) + for node in full_paths_roots: + neighbors = list(full_paths_roots) + for neighbor in list(neighbors): + for n_path in nx.algorithms.all_simple_paths(G, neighbor,node, cutoff = 2): + print(n_path) + if len(n_path) == 2: + node_remove.append(neighbor) for node in node_remove: - try: + try: G.remove_node(node) logger.debug("REMOVE " + str(node)) full_paths_roots.remove(node) full_paths_leafs.remove(node) - except: - continue + except: + continue return (G,full_paths_roots,full_paths_leafs) + def remove_bubbles(graph, source_nodes): for node in source_nodes: neighbors = list(source_nodes) @@ -99,7 +107,6 @@ def find_full_paths(G, paths_roots, paths_leafs): pass for path in list(paths_nx): paths.append(path) - return paths diff --git a/strainy/transform.py b/strainy/transform.py index 5093bf1..5f1e25d 100644 --- a/strainy/transform.py +++ b/strainy/transform.py @@ -15,9 +15,9 @@ from strainy.clustering import build_adj_matrix as matrix from strainy.clustering import cluster_postprocess as postprocess from strainy.simplification import simplify_links as smpl -from strainy.gfa_operations import gfa_ops -from strainy.gfa_operations import asm_graph_ops -from strainy.gfa_operations import overlap_graph_ops +from strainy.graph_operations import gfa_ops +from strainy.graph_operations import asm_graph_ops +from strainy.graph_operations import overlap_graph_ops from strainy.unitig_statistics import utg_stats from strainy.flye_consensus import FlyeConsensus from strainy.clustering import build_data @@ -27,11 +27,11 @@ from strainy.reports.call_variants import produce_strainy_vcf from strainy.preprocessing import gfa_to_fasta from strainy.phase import color_bam - +from dataclasses import dataclass logger = logging.getLogger() -from dataclasses import dataclass +