-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
1706 lines (1475 loc) · 67 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import numpy as np
import itertools
import scipy.optimize
import networkx as nx
import matplotlib.pyplot as plt
import math
from matplotlib.lines import Line2D
import time
import params
import json
import sys
import mec_size
import networkx.algorithms.approximation.vertex_cover as vertex_cover
import dream
"""
Represented by a binary matrix where m[i, j]= 1 iff i->j, else 0
PCDAG objects:
Represented by an integer matrix where m[i, j]= 1 iff i->j, -1 iff i-j (and edge no known), else 0
Weight matrices:
For linear models we store the weights as a matrix of floats. For infinite samples we don't need a weight matrix.
Noise:
Noise scales in gaussian linear models are stored as a vectors of floats
"""
def generate_chain_dag_fixed_root(n, v):
"""
generates a DAG that is just a chain with no colliders, but may not just be directions
left to right (the root vertex is random).
input:
int n: number of nodes
output:
matrix: chain dag with no unshielded colliders
"""
dag_temp = np.zeros((n,n))
for i in range(0, v):
dag_temp[i+1, i] = 1
for i in range(v, n-1):
dag_temp[i, i+1] = 1
return dag_temp
def generate_barabasi_albert(n, m1, m2=1, p=1):
#Generates a scale-free dag using the Barabasi-Albert Model
#each node is attached m1 times with prob p, else m2 times
#0 will always be the root
g = nx.dual_barabasi_albert_graph(n, m1, m2, p)
dag = np.triu(nx.to_numpy_matrix(g), k=1)
np.random.shuffle(dag)
return dag
def uniform_random_tree(n):
"""
generates tree MEC uniformly, then picks a node to be the root
https://nokyotsu.com/qscripts/2008/05/generating-random-trees-and-connected.html
input:
int n: tree size
output:
matrix: dag with tree mec
"""
cpdag = np.zeros((n, n))
src = []
dst = np.random.permutation(np.arange(n)).tolist()
src.append(dst.pop()) # picks the root
while(len(dst)>0):
a = np.random.choice(src) #random element in src
b = dst.pop()
# add edge a,b
cpdag[a][b] = -1
cpdag[b][a] = -1
src.append(b)
#now randomly pick root and orient
dag_temp = orient_tree_root_v(cpdag, np.random.choice(n))
return dag_temp
def generate_k_star_system(n, k):
"""
generates a forest of k stars with n nodes
input:
int n: tree size
int k: number of stars
output:
matrix: dag with tree mec
"""
dag = np.zeros((n,n))
r = int(math.ceil(n/k))
host = 0
nodes = np.arange(n)
np.random.shuffle(nodes)
for i in range(n):
if i%r == 0:
host = i
else:
#start undirected
dag[nodes[host], nodes[i]] = 1
return dag
def generate_fully_connected(n):
"""
generate a fully connected dag with a fully connected mec (no colliders)
input:
int n, number of nodes
output:
matrix dag
"""
#take matrix of all ones, removes the diagonal and below
#only generates DAGs where the ordering is the ordering of the nodes
#but this is fine since all methods are agnostic to this symmetry
dag = np.triu(np.ones(n),k=1)
return dag
def ER_bipartite(n, m, p):
"""
constructs a bipartite graph under an ER-like model
input:
int n: the number of nodes in set one
int m: the number of nodes in set 2
float p: the edge probability param
"""
#strategy: generate undirected graph and remove all below diagonal
nx_dag = nx.bipartite.random_graph(n, m, p, directed=False)
return np.triu(nx.to_numpy_array(nx_dag), 1)
def generate_ER(n, p):
"""
generates a dag using an Erdos renyi model
first generates a random ordering, then
input:
int n, number of nodes
float p, probability of each edge appearing
output:
matrix dag
"""
ordering = np.arange(n)
np.random.shuffle(ordering)
#each element ordering[i] is node indexd by i's place in the ordering
dag = np.zeros((n,n))
for i in range(n):
for j in range(i, n):
if i == j:
continue
if np.random.binomial(1, p) == 1:
if ordering[i] > ordering[j]:
dag[j,i] = 1
else:
dag[i,j] = 1
return dag
def generate_chain_dag(n):
"""
generates a chain DAG that may have colliders. generated by permuting variables
input:
int n: number of nodes
output:
matrix: chain dag potentially with shielded colliders
"""
#the nodes go in order but we just sample randomly whether the arrow goes forwards or backwards
forward_backwards = np.random.binomial(1, 0.5, n)
dag = np.zeros((n,n),dtype=np.int32)
for i in range(0, n-1): #don't let the last noe connect back to the first or get a cycle
if forward_backwards[i] == 0:
dag[i][i+1] = 1
else:
dag[i+1][i] = 1
return dag
def extract_undirected(cpdag, v):
"""
given a cpdag and a node v, get all undirected edges out of v
input:
matrix cpdag
node v
output:
array containing all edges with which vhas an undirected edge with
"""
return np.flatnonzero(np.minimum(cpdag[v], 0))
def extract_all_directed(cpdag):
"""
given a cpdag , extract all edges that are directed
input:
matrix cpdag
output:
list of tuples of form (u, v) where u->v
"""
n = cpdag.shape[0]
edges = []
for v in range(0, n):
for u in range(0, n):
if cpdag[v][u] == 1:
edges.append((v, u))
return edges
def chordal_random_intervention(cpdag, k):
"""
generate a chordal random intervention of size up to k
by chordal random we just mean intervene on nodes adjacent to undirected edges
input:
matrix cpdag: the current cpdag
int k: the number of perturbations
output:
list: the nodes we will perturb
"""
#first extract all nodes with undirected edges next to them
#do this by taking the min of each row (if there is a -1 we will get -1)
#then select all the indices with a -1
min_val = np.amin(cpdag, axis=1)
possible_nodes = np.flatnonzero(np.minimum(min_val, 0)) #the min makes everthing -1 or 0
#second randomly sample from the nodes we just extracted
#even if you don't fill up the constraints (due to too much being identified
#already),this is ok
return np.random.choice(possible_nodes, size = min(k, np.size(possible_nodes)), replace = False)
def chordal_random_intervention_set(cpdag, n_b, k):
"""
generates a batch of chordal random interventions
input:
matrix cpdag: current cpdag
int n_b: interventions in the batch
int k: max number of perturbations per intervention
output:
list of lists of ints intervention_set
"""
intervention_set = []
for _ in range(n_b):
intervention = chordal_random_intervention(cpdag, k)
intervention_set.append(intervention)
return intervention_set
def meek(cpdag, new_edges=None, skip_r3=False, is_tree=False):
"""
applies rules R1 to R4
input:
matrix cpdag
list new_edges, the list of new edges that we start with for meek rules. None means use all directed edges
bool skip_r3, if True does not consider r3. This can be used when working with interventions, since R3 is only
used with colliders, and interventions don't uncover new colliders
bool is_tree: if the mec inputted is a tree, means we need only do R1
output:
matrx cpdag
"""
#keep going til there is a round where you find no edge
edges_found = True
latest_edges = new_edges
if new_edges == None:
latest_edges = extract_all_directed(cpdag)
while edges_found:
edges_found = False
latest_edges_temp = []
#for R1 just need to look at recently oriented edges (u, v) and orient all undirected
#(v, d) where there is not edge between d and u
#print("R1 time")
start_time = time.time()
for edge in latest_edges:
v = edge[1]
u = edge[0]
possible_orients = extract_undirected(cpdag, v)
for d in possible_orients:
#only orient if not yet oriented
if cpdag[v][d] == -1:
#no edge to complete the triangle
if cpdag[d][u] == 0 and cpdag[u][d] == 0 and u!=d:
cpdag[v][d] = 1
cpdag[d][v] = 0
edges_found = True
if((v,d)) not in latest_edges_temp:
latest_edges_temp.append((v, d))
#for R2 look at recently oriented edges (u, v), check if any in a cycle pattern,
#orient
if not is_tree:
start_time = time.time()
for edge in latest_edges:
v = edge[1]
u = edge[0]
#now check all edges going into u
nodes_into = np.flatnonzero(np.maximum(cpdag[:, u], 0))
for w in nodes_into:
#if there exists w-v, orient it as w->v
if cpdag[w, v] == -1:
cpdag[w, v] = 1
cpdag[v, w] = 0
edges_found = True
if((w,v)) not in latest_edges_temp:
latest_edges_temp.append((w, v))
#now check all edges going out of v
nodes_out = np.flatnonzero(np.maximum(cpdag[v], 0))
for w in nodes_out:
#if there exists w-u, orient it as u->w
if cpdag[w, u] == -1:
cpdag[w, u] = 0
cpdag[u, w] = 1
edges_found = True
if((u,w)) not in latest_edges_temp:
latest_edges_temp.append((u, w))
#for R3 take any recently learnt edges and try to fit the pattern around it
if not skip_r3:
for edge in latest_edges:
v = edge[1]
u = edge[0]
#u is bottom left corner, v bottom right corner
#go over all colliders with it
nodes_collide_from = np.flatnonzero(np.maximum(cpdag[:, v], 0))
#w is toop right corner
for w in nodes_collide_from:
if w == u: #ignore the edge we already have
continue
#go over all possible diagonals
#t is top left node
for t in np.flatnonzero(np.minimum(cpdag[:, u], 0)):
#don't consider nodes already in the pattern
if t in [u, w, v]:
continue
#check if we get two triangles with new edges undirected
#also ensure the diagonal itself is= undirected
if (cpdag[t, u] == -1) and (cpdag[t, w] == -1) and (cpdag[t, v]==-1) and (cpdag[u, w] == 0) and (cpdag[w, u] == 0):
cpdag[t, v] = 1 #direct the diagonal
cpdag[v, t] = 0
edges_found = True
if((t,v)) not in latest_edges_temp:
latest_edges_temp.append((t, v))
#for R4 go over the case of detecting each directed edge separately
#left side directed considered first
for edge in latest_edges:
v = edge[1]
u = edge[0]
#find the bottom directed edge that starts at v
nodes_out = np.flatnonzero(np.maximum(cpdag[v], 0))
if len(nodes_out) == 0:
continue
poss_diags_in = np.flatnonzero(np.maximum(cpdag[:, v], 0)).tolist()
poss_diags_out = np.flatnonzero(np.maximum(cpdag[v], 0)).tolist()
poss_diags_undirected = np.flatnonzero(np.minimum(cpdag[:, v], 0)).tolist()
for w in nodes_out:
#now find all diagionals (exclude case of edges identified)
#t is top right node
for t in (poss_diags_in + poss_diags_out + poss_diags_undirected):
if t in [u, w]:
continue
#check if the two undirected edges exist then orient the right one
if (cpdag[t, u] == -1) and (cpdag[t, w] == -1) and (cpdag[u, w] ==0) and (cpdag[w,u] == 0):
cpdag[t, w] = 1
cpdag[w, t] = 0
edges_found = True
if((t,w)) not in latest_edges_temp:
latest_edges_temp.append((t, w))
#now consider its the bottom side we directed.
for edge in latest_edges:
v = edge[0]
w = edge[1] #keep same names as for the code above
nodes_in = np.flatnonzero(np.maximum(cpdag[:, v], 0))
if len(nodes_in) == 0:
continue
poss_diags_in = np.flatnonzero(np.maximum(cpdag[:, v], 0))
poss_diags_out = np.flatnonzero(np.maximum(cpdag[v], 0))
poss_diags_undirected = np.flatnonzero(np.minimum(cpdag[:, v], 0))
for u in nodes_in:
#now everyting else is same as above
#now find all diagionals (exclude case of edges identified)
#t is top right node
for t in (poss_diags_in.tolist() + poss_diags_out.tolist() + poss_diags_undirected.tolist()):
if t in [u, w]:
continue
#check if the two undirected edges exist then orient the right one
if (cpdag[t, u] == -1) and (cpdag[t, w] == -1) and (cpdag[u,w] == 0) and (cpdag[w,u] == 0):
cpdag[t, w] = 1
cpdag[w, t] = 0
edges_found = True
if((t,w)) not in latest_edges_temp:
latest_edges_temp.append((t, w))
latest_edges = latest_edges_temp
return cpdag
def orient_from_intervention(dag, cpdag, intervention_set, is_tree=False):
"""
uses the meek rules to update a cpdag given infinite sample interventional data
from a single intervention. currently no implementation of soft interventions.
input:
matrix dag: the true dag
matrix cpdag: the current cpdag
list of lists of ints intervention_set: the nodes intervened on
output:
matrix: the cpdag after the intervention
"""
n = dag.shape[0]
new_edges = []
for intervention in intervention_set:
start_time = time.time()
#first, orient all the edges we get from R0
for v in intervention:
for i in range(0, n):
if i not in intervention:
#if its not an orientable edge, move on
if cpdag[v][i] != -1:
continue
cpdag[v][i] = dag[v][i]
cpdag[i][v] = dag[i][v]
if dag[v][i] == 1:
new_edges.append((v, i))
if dag[i][v] == 1:
new_edges.append((i, v))
#second, extend the cpdag to a maximally oriented cpdag using the meek rules
cpdag = meek(cpdag,new_edges, skip_r3 = False, is_tree=is_tree)
return cpdag
def cpdag_from_dag_observational(dag):
"""
given the true dag, returns the pcdag identified with just observational data
input:
matrix dag: the true dag
output:
matrix: the maximally oriented cpdag given just observational data
"""
n = dag.shape[0]
#first need to get the skeleton of the dag
skeleton = -dag - dag.T
cpdag = skeleton
#second we identify all unshielded colliders in the true dag
#for all nodes
for i in range(0,n):
#iterate over pairs of parents j,k
parents_i = np.flatnonzero(dag[:,i])
for j,k in itertools.combinations(parents_i, r=2):
#if the parents are not adjacent we learn j->i and k->i
if skeleton[j][k] == 0:
cpdag[i][j] = 0
cpdag[i][k] = 0
cpdag[j][i] = 1
cpdag[k][i] = 1
#third we apply meek rules which are efficient in the observational setting
cpdag = meek(cpdag)
return cpdag
def cpdag_obj_val(cpdag, edgeorient=True):
"""
computes the obj value of the cpdag. If edgeorient=True use the edge orienting objective
input:
matrix cpdag
bool edgorient: whether to use the edgeorienting objective
output:
int: the objective value
"""
return len(extract_all_directed(cpdag))
def orient_tree_root_v(cpdag, v):
"""
orients a cpdag completely given the root. only true output for tree mecs: otherwise nonsense
input:
matrix cpdag
int v: vertex to be the root
output:
matrix: a dag
"""
n = cpdag.shape[0]
#until we have oriented everything just go through applying R1
oriented = [v]
while oriented != []:
u = oriented.pop()
for i in range(0, n):
if cpdag[u][i] == -1:
cpdag[u][i] = 1
cpdag[i][u] = 0
oriented.append(i)
return cpdag
def objective_given_intervention(cpdag, intervention_set, ref_cpdag, n_samples = 1, max_score=1, is_tree=False):
"""
compute the objective value of a specific intervention
input:
matrix: cpdag
list of lists of ints intervention_set
matrix ref_cpdag: the cpdag used in the obj to count the number of oriented edges
in a tree.
int n_samples: number of samples if mode is "sample"
output:
int: the objective value of that intervention
"""
#sum over all dags in the equivalence class
n = cpdag.shape[0]
obj = 0
if mode == "sample":
for dag in mec_size.uniform_sample_dag_plural(cpdag, n_samples):
cpdag2 = orient_from_intervention(dag, cpdag.copy(), intervention_set, is_tree=is_tree)
obj += (cpdag_obj_val(cpdag2) - cpdag_obj_val(ref_cpdag))/n_samples
return obj
return obj/max_score
def objective_given_dags_interventions(cpdag, intervention_set, ref_cpdag, dag_list, is_tree=False):
"""
objective_given_intervention but takes in a fixed list of dags to eval on
"""
obj = 0
for dag in dag_list:
cpdag2 = orient_from_intervention(dag, cpdag.copy(), intervention_set, is_tree=is_tree)
obj += (cpdag_obj_val(cpdag2) - cpdag_obj_val(ref_cpdag))/len(dag_list)
return obj
def gen_stochastic_grad_fun(cpdag, ref_cpdag, num_sample=1, exact=True, total_x = 1, is_tree=False):
#generates the gradient function for a bag of cpdags
#uses the edge orienting obj
n = cpdag.shape[0]
def stochastic_grad(intervention_set, x):
"""
sample one dag in the sum, compute the stochastic gradient as in karimi et al 2018
(for the multilinear extension)
input:
matrix: cpdag
list of lists of ints intervention_set- existing interventions
matrix ref_cpdag: the cpdag used in the obj to count the number of oriented edges
array of floats x: the point we are taking the gradient at
int num_sample: the number of samples used to approximate the objective
bool exact, True if use exact uniform sampling, but false if use the fast inexact method
int total_x: for each dag, how many different interventions do we try
output:
int: the gradient of the objective
"""
grad_f = np.zeros(n)
#sample the intervention given x
dags = mec_size.uniform_sample_dag_plural(cpdag, num_sample, exact=exact)
for dag in dags:
computed_val = {}
#do runs for multiple different samples of x
for _ in range(total_x):
x_rand = np.random.binomial(1, p = x)
for v in range(0, n):
x_rand_upper = x_rand.copy()
x_rand_upper[v] = 1
x_rand_lower = x_rand.copy()
x_rand_lower[v] = 0
#tobytes allows us to store the numpy array
if x_rand_upper.tobytes() not in computed_val:
cpdag_upper = orient_from_intervention(dag, cpdag.copy(), intervention_set+[np.flatnonzero(x_rand_upper).tolist()], is_tree=is_tree)
cpdag_upper_score = cpdag_obj_val(cpdag_upper)
computed_val[x_rand_upper.tobytes()] = cpdag_upper_score
else:
cpdag_upper_score = computed_val[x_rand_upper.tobytes()]
if x_rand_lower.tobytes() not in computed_val:
cpdag_lower = orient_from_intervention(dag, cpdag.copy(), intervention_set+[np.flatnonzero(x_rand_lower).tolist()], is_tree=is_tree)
cpdag_lower_score = cpdag_obj_val(cpdag_lower)
computed_val[x_rand_lower.tobytes()] = cpdag_lower_score
else:
cpdag_lower_score = computed_val[x_rand_lower.tobytes()]
grad_f[v] += (cpdag_upper_score - cpdag_lower_score)/ (num_sample*total_x)
return grad_f
return stochastic_grad
def gen_ss_stochastic_grad_fun(cpdag, ref_cpdag, num_sample=1, exact=True, total_x = 1, is_tree=False):
#Same as gen_stochastic_grad_fun but sep system is the groundset and we add interventions
#not perturbations. Uses the edge orienting objective
n = cpdag.shape[0]
def stochastic_grad(x, ss):
"""
sample one dag in the sum, compute the stochastic gradient as in karimi et al 2018
(for the multilinear extension)
input:
matrix: cpdag
matrix ref_cpdag: the cpdag used in the obj to count the number of oriented edges
array of floats x: the point we are taking the gradient at
int num_sample: the number of samples used to approximate the objective
bool exact, True if use exact uniform sampling, but false if use the fast inexact method
int total_x: for each dag, how many different interventions do we try
output:
int: the gradient of the objective
"""
n_ss = len(ss)
grad_f = np.zeros(n_ss)
#sample the intervention given x
dags = mec_size.uniform_sample_dag_plural(cpdag, num_sample, exact=exact)
for dag in dags:
computed_val = {}
#do runs for multiple different samples of x
for _ in range(total_x):
x_rand = np.random.binomial(1, p = x)
for v in range(0, n_ss):
x_rand_upper = x_rand.copy()
x_rand_upper[v] = 1
x_rand_lower = x_rand.copy()
x_rand_lower[v] = 0
#tobytes allows us to store the numpy array in a hashable way
if x_rand_upper.tobytes() not in computed_val:
indices = np.flatnonzero(x_rand_upper).tolist()
interventions = [ss[i] for i in indices]
cpdag_upper = orient_from_intervention(dag, cpdag.copy(), interventions, is_tree=is_tree)
cpdag_upper_score = cpdag_obj_val(cpdag_upper)
computed_val[x_rand_upper.tobytes()] = cpdag_upper_score
else:
cpdag_upper_score = computed_val[x_rand_upper.tobytes()]
if x_rand_lower.tobytes() not in computed_val:
indices = np.flatnonzero(x_rand_lower).tolist()
interventions = [ss[i] for i in indices]
cpdag_lower = orient_from_intervention(dag, cpdag.copy(), interventions, is_tree=is_tree)
cpdag_lower_score = cpdag_obj_val(cpdag_lower)
computed_val[x_rand_lower.tobytes()] = cpdag_lower_score
else:
cpdag_lower_score = computed_val[x_rand_lower.tobytes()]
grad_f[v] += (cpdag_upper_score - cpdag_lower_score)/ (num_sample*total_x)
return grad_f
return stochastic_grad
def pipage(x, k):
"""
perform pipage rounding for a nonmonotone submodular function
"Maximizing a Monotone Submodular Function subject to a Matroid Constraint"
pipage round is performed on the set until 1 non integer value remains
we will just randomly round the last value by sampling a bernoulli
input:
float array x: the solution to be rounded
int k: constraint on perturbation size
output:
int list: the intervention
"""
x = np.round(x, decimals=10) #round to 10 decimals for numerical stability
#now select noninteger entries given by T as is the notation in the paper above
T = np.flatnonzero((np.round(x, decimals=0) - x)).tolist()
epsilon = 0.001 #rounding threshold for numerical stability
while len(T) > 1:
ij = np.random.choice(T, 2, replace=False)
i = ij[0]
j = ij[1]
#option 1: x_i is the one where we make the i direction large
dis_to_boundary_i = np.minimum(1-x[i], x[j])
x_i = x.copy()
x_i[i] += dis_to_boundary_i
x_i[j] -= dis_to_boundary_i
#same for j
dis_to_boundary_j = np.minimum(1-x[j], x[i])
x_j = x.copy()
x_j[i] -= dis_to_boundary_j
x_j[j] += dis_to_boundary_j
#sample the direction based on the sampling rule
p = abs(dis_to_boundary_i / (dis_to_boundary_j + dis_to_boundary_i))
if p>1+epsilon:
print(x)
print(p)
print(x_i)
print(x_j)
assert p < 1 + epsilon #check that we don't have funky probabilities
if p < 1+ epsilon and p > 1:
p = 1
if p > - epsilon and p < 0:
p = 0
change_lower = np.random.binomial(1, p, 1)[0]
if change_lower:
x = x_j
else:
x = x_i
#change T if we get to some integer solutions
if abs(x[i] - 1) < epsilon or abs(x[i]) < epsilon:
x[i] = round(x[i])
T.remove(i)
if abs(x[j] - 1) < epsilon or abs(x[j]) < epsilon:
x[j] = round(x[j])
T.remove(j)
#if one element left, bernoulli sample its value. can do since submod->concave along
#any nonnegative direction vector
if len(T) == 1:
#first just set to 0 if fulfilled condition and have rounding error
if np.sum(x) > k and np.sum(x) < k+0.1:
x[T[0]] = 0
else:
#if no interventions yet just do the intervention for sure
if(np.sum(x) <= 1):
x[T[0]] = 1
#otherwise sample whether to include
else:
x[T[0]] = np.random.binomial(1, x[T[0]], 1)[0]
#If we get[0.9, 0, 0], we can intervene on nothing. The fixed used is to
#intervene on the last remaining variable for sure. That's in the if above
return x
def gen_hess_fun(cpdag, ref_cpdag, num_sample=1, exact=True, total_x = 1, is_tree=False):
"""
generates the hessian function of the edge orienting obj given a bag of cpdags
num_sample: number of times we sample a dag
total_x: number of repeats per sampled dag
"""
n = cpdag.shape[0]
def hess_fun(intervention_set, x, e):
"""
estimates the hessian for gred
"""
#sample the intervention given x
dags = mec_size.uniform_sample_dag_plural(cpdag, num_sample, exact=exact)
hess = np.zeros((n, n))
for dag in dags:
computed_val = {}
#do runs for multiple different samples of x
for _ in range(total_x):
S = []
for s in range(n):
if e[s] < x[s]:
S.append(s)
for i in range(n):
for j in range(i, n):
if i == j:
continue
S_ij = list({i, j}.union(set(S)))
S_i = list({i}.union(set(S)) - {j})
S_j = list({j}.union(set(S)) - {i})
S_minus = list(set(S) - {i,j}) #the set with both indices removed
for S_mod in [S_ij, S_i, S_j, S_minus]:
if np.array(S_mod).tobytes() not in computed_val:
cpdag_new = orient_from_intervention(dag, cpdag.copy(), intervention_set+[S_mod], is_tree=is_tree)
computed_val[np.array(S_mod).tobytes()] = cpdag_obj_val(cpdag_new)
hess[i,j] += (computed_val[np.array(S_ij).tobytes()]-computed_val[np.array(S_i).tobytes()]-
computed_val[np.array(S_j).tobytes()]+computed_val[np.array(S_minus).tobytes()])/ (num_sample*total_x)
#print(time.time()-time2)
return hess
return hess_fun
def gen_ss_hess_fun(cpdag, ref_cpdag, num_sample=1, exact=True, total_x = 1, is_tree=False):
"""
hessian for SS-based approach
"""
n = cpdag.shape[0]
def hess_fun(x, e, ss):
"""
estimates the hessian for gred
"""
n_ss=len(ss)
#sample the intervention given x
dags = mec_size.uniform_sample_dag_plural(cpdag, num_sample, exact=exact)
hess = np.zeros((n_ss, n_ss))
for dag in dags:
computed_val = {}
#do runs for multiple different samples of x
for _ in range(total_x):
S = []
for s in range(n_ss):
if e[s] < x[s]:
S.append(ss[s])
for i in range(n_ss):
for j in range(i, n_ss):
if i == j:
continue
S_ij = list({tuple(a) for a in ([ss[i], ss[j]] + S)})
S_i = list({tuple(a) for a in ([ss[i]] + S)} - {tuple(ss[j])})
S_j = list({tuple(a) for a in ([ss[j]] + S)} - {tuple(ss[i])})
S_minus = list({tuple(a) for a in S} - {tuple(ss[j]), tuple(ss[i])}) #the set with both indices removed
for S_mod in [S_ij, S_i, S_j, S_minus]:
if np.array(S_mod).tobytes() not in computed_val:
cpdag_new = orient_from_intervention(dag, cpdag.copy(), S_mod, is_tree=is_tree)
computed_val[np.array(S_mod).tobytes()] = cpdag_obj_val(cpdag_new)
hess[i,j] += (computed_val[np.array(S_ij).tobytes()]-computed_val[np.array(S_i).tobytes()]-
computed_val[np.array(S_j).tobytes()]+computed_val[np.array(S_minus).tobytes()])/ (num_sample*total_x)
return hess
return hess_fun
def scdpp(cpdag, n_b, k, obj, stochastic_grad, hess_fun, T=100, max_score=1, M0 = 10, M=10, num_dag_sample=10, is_tree=False):
"""
The improved version of stochastic coninuous greedy by Hassani et al 2020
uses hessian approximation instead of just gradient information
M0 and M are minibatch sizes
"""
intervention_set = []
n = cpdag.shape[0]
u_bar = 1 #the upper bound for each variable in x_t
A = np.vstack((np.ones(n), np.eye(n))) #constraint matrix for linear program
#gen_stochastic_grad_function needs num_x to be set to M
for _ in range(n_b):
#first lets do t=1
x_t = np.zeros(n)
x_tp = np.zeros(n) #previous x
grad_f = np.zeros(n)
for t in range(1, T):
if t == 1:
#sample minibatch and compute gradient g^0
for i in range(M0):
grad_f += stochastic_grad(intervention_set, x_t) / M0
else:
hess = np.zeros((n,n))
for i in range(M):
a = np.random.uniform()
e = np.random.uniform(size=n)
x_a = a * x_t + (1-a) * x_tp
hess += hess_fun(intervention_set, x_a, e) / M
delta = hess.dot(x_t - x_tp)
grad_f = grad_f + delta
#compute ascent direction
b = np.insert(u_bar-x_t, 0, k) #constraint vector
v_t = scipy.optimize.linprog(-grad_f, A_ub = A, b_ub = b, bounds = (0,1))
x_tp = x_t
x_t = x_t + v_t.x / T
if(np.sum(x_t) > k+0.1):
raise Exception
#now do pipage rounding a few times and pick the best
best_x = pipage(x_t, k)
best_score = obj(intervention_set+[np.flatnonzero(best_x).tolist()])
#do ten runs of pipage rounding and pick the best
for _ in range(10):
x = pipage(x_t, k)
x_score = obj(intervention_set+[np.flatnonzero(x).tolist()])
if x_score > best_score:
best_x = x
best_score = x_score
intervention_set.append(np.flatnonzero(best_x).tolist())
return intervention_set
def scdpp_ss(cpdag, n_b, k, obj, stochastic_grad, hess_fun, T=100, max_score=1, M0 = 10, M=10, num_dag_sample=10, is_tree=False, all_k = True, smart_ss=True):
"""
The improved version of monotone stochastic coninuous greedy by Hassani et al 2020
M0 and M are minibatch sizes.
With seperating system to select perturbations, so just use continuous approach
to select interventions from SS.
"""
n = cpdag.shape[0]
if smart_ss:
ss = smart_ss_construct(cpdag, k)
else:
#will only do it for the k given
ss = ss_construct(n, k)
n_ss = len(ss)
#if we can completely identify using the seperating system, do so
if n_ss <= n_b:
return ss
A = np.ones((1, n_ss)) #constraint matrix for linear program
#gen_stochastic_grad_function needs num_x to be set to M
#first lets do t=1
x_t = np.zeros(n_ss)
x_tp = np.zeros(n_ss) #previous x
grad_f = np.zeros(n_ss)
for t in range(1, T):
if t == 1:
#sample minibatch and compute gradient g^0
for i in range(M0):
grad_f += stochastic_grad(x_t, ss) / M0
else:
hess = np.zeros((n_ss,n_ss))
for i in range(M):
a = np.random.uniform()
e = np.random.uniform(size=n_ss)
x_a = a * x_t + (1-a) * x_tp
hess += hess_fun(x_a, e, ss) / M
delta = hess.dot(x_t - x_tp)
grad_f = grad_f + delta
#compute ascent direction
v_t = scipy.optimize.linprog(-grad_f, A_ub = A, b_ub = np.asarray([n_b]), bounds = (0,1))
x_tp = x_t
x_t = x_t + v_t.x / T
#check x doesn't break the constraint
if(np.sum(x_t) > n_b+0.1):
raise Exception
#resolve any slight numerical issues by ensuring the constraints are met
x_t = np.minimum(x_t / np.linalg.norm(x_t, ord=1) * n_b, 1)
#now do pipage rounding a few times and pick the best
best_x = pipage(x_t, n_b)
indices = np.flatnonzero(best_x).tolist()
interventions = [ss[i] for i in indices]
best_score = obj(interventions)
#do ten runs of pipage rounding and pick the best
for _ in range(10):
x = pipage(x_t, n_b)
indices = np.flatnonzero(x).tolist()
interventions = [ss[i] for i in indices]
x_score = obj(interventions)
if x_score > best_score:
best_x = x
best_score = x_score
indices = np.flatnonzero(best_x).tolist()
interventions = [ss[i] for i in indices]
return interventions
def edge_obj_sample(cpdags, ws, num_samples, obj=None, is_tree=False):
"""
takes obj that uses a list of dags, and returns the obj with a fixed dag list
cpdags is a list of MECs, ws is weights. We uniform sample from the possible DAGs
If obj==None, assume is edge orienting objective
"""
if obj in [objective_given_dags_interventions, None]:
num_cpdags = len(cpdags)
dag_list = []
cpdag_list = []
for i in range(num_samples):
cpdag = cpdags[np.random.choice(num_cpdags, p=ws)]
dag = mec_size.uniform_sample_dag_plural(cpdag, 1)[0]
cpdag_list.append(cpdag)
dag_list.append(dag)
def new_obj(epsilon):
out = 0
for i in range(num_samples):
out += objective_given_dags_interventions(cpdag_list[i], epsilon, cpdag_list[i].copy(), [dag_list[i]], is_tree=is_tree) / num_samples
return out
return new_obj
return
def weighted_dags_edge_obj_sample(cpdags, ws, dags, obj = None, total_x=1, is_tree=False):
"""
Just uses a list of given bootstrapped dags and corresponding cpdags and weights to
construct an objective that uses the same dist over dags as the finite sample obj
also returns a way for us to get a stochastic gradients function
total x is the number of samples of the intervention from
the categorical dist given by x_t when computing the stochastic grad
"""
n = cpdags[0].shape[0]
num_samples = len(cpdags)
def new_obj(epsilon):
out = 0
for i in range(num_samples):
out += objective_given_dags_interventions(cpdags[i], epsilon, cpdags[i].copy(), [dags[i]], is_tree=is_tree) * ws[i]
return out
def new_stochastic_grad(intervention_set, x):
"""
intervention set is existing interventions, x is the continuous numpy array
"""
grad_f = np.zeros(n)
#sample the intervention given x
indexes = np.random.randint((len(dags)), size=1)
for i in indexes:
dag = dags[i]
cpdag = cpdags[i]
computed_val = {}