Skip to content

Commit

Permalink
Add toposort to Graph and update the dagnn function in _ml.py
Browse files Browse the repository at this point in the history
  • Loading branch information
pablormier committed Dec 4, 2024
1 parent 9d2a429 commit 3959914
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 145 deletions.
28 changes: 27 additions & 1 deletion corneto/_graph.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import abc
import pickle
from collections import OrderedDict
from collections import OrderedDict, deque
from copy import deepcopy
from enum import Enum
from itertools import chain
Expand Down Expand Up @@ -632,6 +632,32 @@ def opener(file, mode="r"):
with opener(filename, "rb") as f:
return pickle.load(f)

def toposort(self):
# Topological sort using Kahn's algorithm
in_degree = {v: len(set(self.predecessors(v))) for v in self._get_vertices()}

# Initialize queue with nodes having zero in-degree
queue = deque([v for v in in_degree.keys() if in_degree[v] == 0])

result = []

while queue:
v = queue.popleft()
result.append(v)

# Decrease the in-degree of successor nodes by 1
for successor in self.successors(v):
in_degree[successor] -= 1
if in_degree[successor] == 0:
queue.append(successor)

# Check if topological sort is possible (i.e., graph has no cycles)
if len(result) == self.num_vertices:
return result
else:
raise ValueError("Graph contains a cycle, so topological sort is not possible.")


def reachability_analysis(
self,
input_nodes,
Expand Down
145 changes: 1 addition & 144 deletions corneto/_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,6 @@ def _load_keras():
except ImportError as e:
raise e

def _concat_indexes(layer, indexes, keras):
if len(indexes) > 1:
if len(set(indexes)) == layer.shape[1]:
subset = layer
else:
slices = [layer[:, j : (j + 1)] for j in indexes]
subset = keras.layers.Concatenate()(slices)
else:
j = list(indexes)[0]
subset = layer[:, j : (j + 1)]
return subset

def toposort(G):
# Topological sort using Kahn's algorithm
in_degree = {v: len(set(G.predecessors(v))) for v in G._get_vertices()}
Expand All @@ -56,7 +44,6 @@ def toposort(G):
raise ValueError("Graph contains a cycle, so topological sort is not possible.")



def index_selector():
keras = _load_keras()

Expand Down Expand Up @@ -101,8 +88,6 @@ def get_config(self):

return IndexSelector



def build_dagnn(
G,
input_nodes,
Expand Down Expand Up @@ -130,7 +115,7 @@ def build_dagnn(
)(input_layer)
if unit_norm_input:
input_layer = keras.layers.UnitNormalization()(input_layer)
vertices = toposort(G)
vertices = G.toposort()
input_index = {v: i for i, v in enumerate(input_nodes)}
kernel_reg, bias_reg = None, None
if kernel_reg_l1 > 0 or kernel_reg_l2 > 0:
Expand Down Expand Up @@ -212,133 +197,5 @@ def build_dagnn(
model = keras.Model(inputs=input_layer, outputs=output_layer)
return model


def create_dagnn(
G: BaseGraph,
input_nodes,
output_nodes,
bias_reg_l1=0,
bias_reg_l2=0,
kernel_reg_l1=0,
kernel_reg_l2=0,
batch_norm_input=True,
batch_norm_center=False,
batch_norm_scale=False,
unit_norm_input=False,
dropout=0.20,
min_inputs_for_dropout=2,
activation_attribute="activation",
default_hidden_activation="sigmoid",
default_output_activation="sigmoid",
verbose=False,
):
keras = _load_keras()
input_layer = keras.Input(shape=(len(input_nodes),), name="inputs")
if batch_norm_input:
input_layer = keras.layers.BatchNormalization(
center=batch_norm_center, scale=batch_norm_scale
)(input_layer)
if unit_norm_input:
input_layer = keras.layers.UnitNormalization()(input_layer)
#if nonneg_unit_norm_input:
# input_layer = keras.layers.Lambda(lambda x: (1 + x) / 2)(input_layer)
input_index = {v: i for i, v in enumerate(input_nodes)}
queue = list(input_nodes)
neurons = {}
concat_cache = {}
while len(queue) > 0:
v = queue.pop(0)
for s in G.successors(v):
if s not in neurons:
queue.append(s)
else:
continue
if s not in input_index:
n_inputs = []
s_idx_inputs = set()
s_neu_inputs = set()
for p in G.predecessors(s):
if p in input_index:
idx = input_index[p]
s_idx_inputs.add(idx)
else:
s_neu_inputs.add(p)
# Check if all neuron inputs are created
if len(s_neu_inputs) > 0:
if not all([p in neurons for p in s_neu_inputs]):
continue
# Now check if there is a cached concatenation
# for the inputs of this neuron
if len(s_idx_inputs) > 0:
s_idx_inputs = frozenset(s_idx_inputs)
if s_idx_inputs in concat_cache:
n_inputs.append(concat_cache[s_idx_inputs])
else:
subset_inputs = _concat_indexes(
input_layer, s_idx_inputs, keras
)
concat_cache[s_idx_inputs] = subset_inputs
n_inputs.append(subset_inputs)
if len(s_neu_inputs) > 0:
s_neu_inputs = frozenset(s_neu_inputs)
if s_neu_inputs in concat_cache:
n_inputs.append(concat_cache[s_neu_inputs])
else:
if len(s_neu_inputs) > 1:
subset_inputs = keras.layers.Concatenate()(
[neurons[p] for p in s_neu_inputs]
)
concat_cache[s_neu_inputs] = subset_inputs
else:
subset_inputs = neurons[list(s_neu_inputs)[0]]
n_inputs.append(subset_inputs)
if len(n_inputs) > 1:
neuron_inputs = keras.layers.Concatenate(name=f"{s}_c")(n_inputs)
else:
neuron_inputs = n_inputs[0]
if dropout > 0 and len(n_inputs) >= min_inputs_for_dropout:
neuron_inputs = keras.layers.Dropout(dropout)(neuron_inputs)

# Create the neuron.
default_act = (
default_hidden_activation
if s not in output_nodes
else default_output_activation
)
act = G.get_attr_vertex(s).get(activation_attribute, default_act)
# ElasticNet regularization
kernel_reg, bias_reg = None, None
if kernel_reg_l1 > 0 or kernel_reg_l2 > 0:
kernel_reg = keras.regularizers.l1_l2(
l1=bias_reg_l1, l2=bias_reg_l2
)
if bias_reg_l1 > 0 or bias_reg_l2 > 0:
bias_reg = keras.regularizers.l1_l2(
l1=kernel_reg_l1, l2=kernel_reg_l2
)
neuron = keras.layers.Dense(
1,
activation=act,
kernel_regularizer=kernel_reg,
bias_regularizer=bias_reg,
name=s,
)
x = neuron(neuron_inputs)
neurons[s] = x
if verbose:
print(
f"{s} ({act}) > {len(s_idx_inputs)} data input(s), {len(s_neu_inputs)} neuron input(s)"
)
# Create the model
if len(output_nodes) == 1:
output_layer = neurons[output_nodes[0]]
else:
output_layer = keras.layers.Concatenate(name="output_layer")(
[neurons[v] for v in output_nodes]
)
model = keras.Model(inputs=input_layer, outputs=output_layer)
return model


def plot_model(model):
return _load_keras().utils.plot_model(model)
15 changes: 15 additions & 0 deletions tests/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,21 @@ def test_graph_bfs_rev():
assert dist[4] == 2
assert 5 not in dist

def test_graph_toposort():
g = Graph()
g.add_edge("a", "b")
g.add_edge("a", "c")
g.add_edge("c", "b")
g.add_edge("c", "d")
g.add_edge("c", "e")
g.add_edge("b", "d")
g.add_edge("d", "e")
order = g.toposort()
assert order.index("a") < order.index("b")
assert order.index("a") < order.index("c")
assert order.index("c") < order.index("d")
assert order.index("d") < order.index("e")
assert order.index("b") < order.index("d")

def test_incidence_single_edge_single_source_vertex():
g = Graph()
Expand Down

0 comments on commit 3959914

Please sign in to comment.