py-why · adam2392 · Jan 13, 2023
diff --git a/.codespellignore b/.codespellignore
@@ -1,2 +1,3 @@
 raison
-wee
+wee
+ges
diff --git a/dodiscover/score/__init__.py b/dodiscover/score/__init__.py
@@ -0,0 +1 @@
+from .ges_alg import GES
diff --git a/dodiscover/score/ges_alg.py b/dodiscover/score/ges_alg.py
@@ -0,0 +1,43 @@
+from typing import Callable, Optional, Union
+
+import networkx as nx
+import pandas as pd
+from pywhy_graphs.array.export import clearn_arr_to_graph
+
+from dodiscover.context import Context
+
+
+# XXX: see https://github.com/juangamella/ges
+class GES:
+    graph_: Optional[nx.DiGraph]
+
+    def __init__(
+        self,
+        scoring_method: Union[Callable, str] = "bic",
+        max_indegree: int = None,
+        **scoring_method_kwargs,
+    ) -> None:
+        self.scoring_method = scoring_method
+        self.max_indegree = max_indegree
+        self.scoring_method_kwargs = scoring_method_kwargs
+
+        self.graph_ = None
+
+    def fit(self, df: pd.DataFrame, ctx: Context):
+        from causallearn.search.ScoreBased.GES import ges
+
+        X = df.to_numpy()
+
+        # run causal-learn
+        ges_record = ges(
+            X, score_func=self.scoring_method, maxP=self.max_indegree, **self.scoring_method_kwargs
+        )
+
+        causal_learn_graph = ges_record["G"]
+        names = [n.name for n in causal_learn_graph.nodes]
+        adjmat = causal_learn_graph.graph
+
+        self.causal_learn_graph_ = causal_learn_graph
+        self.score_ = ges_record["score"]
+        self.graph_ = clearn_arr_to_graph(adjmat, arr_idx=names, graph_type="DiGraph")
+        return self
diff --git a/tests/unit_tests/replearning/test_gin.py b/tests/unit_tests/replearning/test_gin.py
@@ -9,25 +9,26 @@
 
 
 def test_estimate_gin_testdata():
-    """Test the wrapper to the causal-learn GIN algorithm for estimating the causal DAG.
-    """
+    """Test the wrapper to the causal-learn GIN algorithm for estimating the causal DAG."""
 
     # Sim data
     np.random.seed(123)
     num_samples = 1000
     # First latent is a uniform
     latent_var_1 = np.random.uniform(0, 100, num_samples)
     # Second latent is caused by first via nonlinear transform
-    latent_var_2 = np.array(list(map(
-        lambda u: 100 * .03 * u / (1 + .03 * u),
-        latent_var_1)))
+    latent_var_2 = np.array(list(map(lambda u: 100 * 0.03 * u / (1 + 0.03 * u), latent_var_1)))
     # Observed variables. X1 and X2 are caused by L1, X3 and X4 are caused by L2
-    observed_vars = np.array([
-        latent_var_1 + np.random.normal(0, 1, num_samples),  # X1 caused by L1
-        (100 - latent_var_1) + np.random.normal(0, 1, num_samples),  # X2 caused by L1, mirros X1
-        latent_var_2 + np.random.normal(0, 1, num_samples),  # X3 caused by L2
-        (100 - latent_var_2) + np.random.normal(0, 1, num_samples),  # X4 caused by L2, mirrors X3
-    ]).transpose()
+    observed_vars = np.array(
+        [
+            latent_var_1 + np.random.normal(0, 1, num_samples),  # X1 caused by L1
+            (100 - latent_var_1)
+            + np.random.normal(0, 1, num_samples),  # X2 caused by L1, mirros X1
+            latent_var_2 + np.random.normal(0, 1, num_samples),  # X3 caused by L2
+            (100 - latent_var_2)
+            + np.random.normal(0, 1, num_samples),  # X4 caused by L2, mirrors X3
+        ]
+    ).transpose()
     data = pd.DataFrame(observed_vars, columns=["X1", "X2", "X3", "X4"])
 
     g_answer = CPDAG(
@@ -36,9 +37,10 @@ def test_estimate_gin_testdata():
             ("L1", "X2"),
             ("L2", "X3"),
             ("L2", "X4"),
-        ], [
+        ],
+        [
             ("L1", "L2"),
-        ]
+        ],
     )
 
     context = make_context().variables(data=data).build()
-Original file line number
+Diff line change
@@ -1,2 +1,3 @@
     raison
-    wee
+    wee
+    ges