Skip to content

Commit

Permalink
fix hop implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
bdpedigo committed Mar 12, 2024
1 parent 5c1b665 commit 2905047
Showing 1 changed file with 140 additions and 33 deletions.
173 changes: 140 additions & 33 deletions networkframe/networkframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

Index = Union[pd.Index, pd.MultiIndex, np.ndarray, list]

ConnectionType = Union[Literal["weak"], Literal["strong"]]


class NetworkFrame:
"""Represent a network as a pair of DataFrames, one for nodes and one for edges."""
Expand Down Expand Up @@ -653,37 +655,58 @@ def to_sparse_adjacency(
)
return adj

def _get_component_indices(self) -> tuple[int, np.ndarray]:
def _get_component_indices(
self, directed: bool = True, connection: ConnectionType = "weak"
) -> tuple[int, np.ndarray]:
"""Helper function for connected_components."""
from scipy.sparse.csgraph import connected_components

adjacency = self.to_sparse_adjacency()
n_components, labels = connected_components(adjacency, directed=self.directed)
n_components, labels = connected_components(
adjacency, directed=directed, connection=connection
)
return n_components, labels

def largest_connected_component(
self, inplace: bool = False, verbose: bool = False
self,
directed: bool = True,
connection: ConnectionType = "weak",
inplace: bool = False,
verbose: bool = False,
) -> Optional[Self]:
"""
Find the largest [connected component](https://en.wikipedia.org/wiki/Component_(graph_theory))
of the network.
Parameters
----------
directed
If True (default), then operate on a directed graph: only move from point
`i` to point `j` along edges from `i` to `j`. If False, then compute
components on an undirected graph: the algorithm will tread edges from
`i` to `j` and from `j` to `i` the same.
connection
['weak'|'strong']. For directed graphs, the type of connection to use.
Nodes `i` and `j` are strongly connected if a path exists both from `i` to
`j` and from `j` to `i`. A directed graph is weakly connected if replacing
all of its directed edges with undirected edges produces a connected
(undirected) graph. If directed == False, this keyword is not referenced.
inplace
Whether to modify the `NetworkFrame` to select only the largest connected
component, rather than returning a new one.
verbose
Whether to print the number of nodes removed when taking the largest connected
component.
Whether to print the number of nodes removed when taking the largest
connected component.
Returns
-------
:
A new NetworkFrame with only the largest connected component. If
`inplace=True`, returns `None`.
"""
_, labels = self._get_component_indices()
_, labels = self._get_component_indices(
directed=directed, connection=connection
)
label_counts = pd.Series(labels).value_counts()
biggest_label = label_counts.idxmax()
mask = labels == biggest_label
Expand All @@ -697,56 +720,126 @@ def largest_connected_component(

return self._return(nodes=nodes, edges=edges, inplace=inplace)

def connected_components(self) -> Iterator["NetworkFrame"]:
def connected_components(
self, directed: bool = True, connection: ConnectionType = "weak"
) -> Iterator["NetworkFrame"]:
"""
Iterate over the [connected components](https://en.wikipedia.org/wiki/Component_(graph_theory))
of the network.
Parameters
----------
directed
If True (default), then operate on a directed graph: only move from point
`i` to point `j` along edges from `i` to `j`. If False, then compute
components on an undirected graph: the algorithm will tread edges from
`i` to `j` and from `j` to `i` the same.
connection
['weak'|'strong']. For directed graphs, the type of connection to use.
Nodes `i` and `j` are strongly connected if a path exists both from `i` to
`j` and from `j` to `i`. A directed graph is weakly connected if replacing
all of its directed edges with undirected edges produces a connected
(undirected) graph. If directed == False, this keyword is not referenced.
Yields
------
:
A new NetworkFrame for each connected component.
"""
n_components, labels = self._get_component_indices()
n_components, labels = self._get_component_indices(
directed=directed, connection=connection
)
index = self.nodes.index

for i in range(n_components):
this_index = index[labels == i]
yield self.loc[this_index, this_index]

def n_connected_components(self) -> int:
def n_connected_components(
self, directed: bool = True, connection: ConnectionType = "weak"
) -> int:
"""
Return the number of [connected components](https://en.wikipedia.org/wiki/Component_(graph_theory))
of the network.
Parameters
----------
directed
If True (default), then operate on a directed graph: only move from point
`i` to point `j` along edges from `i` to `j`. If False, then compute
components on an undirected graph: the algorithm will tread edges from
`i` to `j` and from `j` to `i` the same.
connection
['weak'|'strong']. For directed graphs, the type of connection to use.
Nodes `i` and `j` are strongly connected if a path exists both from `i` to
`j` and from `j` to `i`. A directed graph is weakly connected if replacing
all of its directed edges with undirected edges produces a connected
(undirected) graph. If directed == False, this keyword is not referenced.
Returns
-------
:
The number of connected components.
"""
n_components, _ = self._get_component_indices()
n_components, _ = self._get_component_indices(
directed=directed, connection=connection
)
return n_components

def is_fully_connected(self) -> bool:
def is_fully_connected(
self, directed: bool = True, connection: ConnectionType = "weak"
) -> bool:
"""
Return whether the network is fully connected.
Parameters
----------
directed
If True (default), then operate on a directed graph: only move from point
`i` to point `j` along edges from `i` to `j`. If False, then compute
components on an undirected graph: the algorithm will tread edges from
`i` to `j` and from `j` to `i` the same.
connection
['weak'|'strong']. For directed graphs, the type of connection to use.
Nodes `i` and `j` are strongly connected if a path exists both from `i` to
`j` and from `j` to `i`. A directed graph is weakly connected if replacing
all of its directed edges with undirected edges produces a connected
(undirected) graph. If directed == False, this keyword is not referenced.
Returns
-------
:
Whether the network is fully connected.
"""
return self.n_connected_components() == 1
return (
self.n_connected_components(directed=directed, connection=connection) == 1
)

def label_nodes_by_component(
self, name: str = "component", inplace: bool = False
self,
name: str = "component",
inplace: bool = False,
directed: bool = True,
connection: ConnectionType = "weak",
) -> Optional[Self]:
"""
Add a column to `.nodes` labeling which connected component they are in.
Parameters
----------
directed
If True (default), then operate on a directed graph: only move from point
`i` to point `j` along edges from `i` to `j`. If False, then compute
components on an undirected graph: the algorithm will tread edges from
`i` to `j` and from `j` to `i` the same.
connection
['weak'|'strong']. For directed graphs, the type of connection to use.
Nodes `i` and `j` are strongly connected if a path exists both from `i` to
`j` and from `j` to `i`. A directed graph is weakly connected if replacing
all of its directed edges with undirected edges produces a connected
(undirected) graph. If directed == False, this keyword is not referenced.
name
The name of the column to add to `.nodes`.
inplace
Expand All @@ -759,7 +852,9 @@ def label_nodes_by_component(
A new NetworkFrame with the component labels added to `.nodes`. If
`inplace=True`, returns `None`.
"""
_, labels = self._get_component_indices()
_, labels = self._get_component_indices(
directed=directed, connection=connection
)

if inplace:
nodes = self.nodes
Expand All @@ -771,16 +866,34 @@ def label_nodes_by_component(

return self._return(nodes=nodes, inplace=inplace)

def component_labels(self) -> pd.Series:
def component_labels(
self, directed: bool = True, connection: str = "weak"
) -> pd.Series:
"""Return the indices of the connected components.
Parameters
----------
directed
If True (default), then operate on a directed graph: only move from point
`i` to point `j` along edges from `i` to `j`. If False, then compute
components on an undirected graph: the algorithm will tread edges from
`i` to `j` and from `j` to `i` the same.
connection
['weak'|'strong']. For directed graphs, the type of connection to use.
Nodes `i` and `j` are strongly connected if a path exists both from `i` to
`j` and from `j` to `i`. A directed graph is weakly connected if replacing
all of its directed edges with undirected edges produces a connected
(undirected) graph. If directed == False, this keyword is not referenced.
Returns
-------
:
A series of the same length as the number of nodes, where each element
corresponds to the connected component of the node at that index.
"""
_, labels = self._get_component_indices()
_, labels = self._get_component_indices(
directed=directed, connection=connection
)
labels = pd.Series(labels, index=self.nodes.index)
return labels

Expand Down Expand Up @@ -1065,7 +1178,10 @@ def node_agreement(self, other: Self) -> float:
return self.nodes.index.isin(other.nodes.index).mean()

def k_hop_neighborhood(
self, node_id: Union[int, str], k: int, directed: bool = False, method="power"
self,
node_id: Union[int, str],
k: int,
directed: bool = False,
):
"""
Return the k-hop neighborhood of a node.
Expand All @@ -1079,9 +1195,6 @@ def k_hop_neighborhood(
directed
Whether to consider the network as directed for computing the reachable
nodes.
method
The method to use to compute the k-hop neighborhood. Currently only "power" is
supported. Dijkstra's algorithm may be supported in the future.
Returns
-------
Expand All @@ -1094,19 +1207,13 @@ def k_hop_neighborhood(
return self.query_nodes("index == @node_id", local_dict=locals())

sparse_adjacency = self.to_sparse_adjacency()
if not directed:
sparse_adjacency = sparse_adjacency.maximum(sparse_adjacency.T)
sparse_adjacency = (sparse_adjacency > 0).astype(bool)
power_adjacency = sparse_adjacency
for _ in range(k - 1):
power_adjacency = power_adjacency @ sparse_adjacency
index = self.nodes.index.get_loc(node_id)
row_mask = power_adjacency[[index], :]
nonzero_indices = row_mask.nonzero()
targets = nonzero_indices[1]
select_indices = self.nodes.index[targets].to_list()
if node_id not in select_indices:
select_indices.append(node_id)
iloc = self.nodes.index.get_loc(node_id)

from scipy.sparse.csgraph import dijkstra

dists = dijkstra(sparse_adjacency, directed=directed, indices=iloc, limit=k)
mask = ~np.isinf(dists)
select_indices = self.nodes.index[mask]
select_indices
return self.query_nodes("index in @select_indices", local_dict=locals())

Expand Down

0 comments on commit 2905047

Please sign in to comment.