Mehei/scoring (#44)

* add scoring function * add score example on dml notebook
py-why · May 3, 2019 · c09692c · c09692c
1 parent 986f0e9
commit c09692c
Show file tree

Hide file tree

Showing 3 changed files with 194 additions and 39 deletions.
diff --git a/econml/dml.py b/econml/dml.py
@@ -134,6 +134,34 @@ def effect(self, T0, T1, X):
             T1 = self._one_hot_encoder.transform(reshape(self._label_encoder.transform(T1), (-1, 1)))[:, 1:]
         return super().effect(T0, T1, X)
 
+    def score(self, Y, T, X=None, W=None):
+        if self._discrete_treatment:
+            T = self._one_hot_encoder.transform(reshape(self._label_encoder.transform(T), (-1, 1)))[:, 1:]
+        if T.ndim == 1:
+            T = reshape(T, (-1, 1))
+        if Y.ndim == 1:
+            Y = reshape(Y, (-1, 1))
+        if X is None:
+            X = np.ones((shape(Y)[0], 1))
+        if W is None:
+            W = np.empty((shape(Y)[0], 0))
+        Y_test_pred = np.zeros(shape(Y) + (self._n_splits,))
+        T_test_pred = np.zeros(shape(T) + (self._n_splits,))
+        for ind in range(self._n_splits):
+            if self._discrete_treatment:
+                T_test_pred[:, :, ind] = reshape(self._models_t[ind].predict(X, W)[:, 1:], shape(T))
+            else:
+                T_test_pred[:, :, ind] = reshape(self._models_t[ind].predict(X, W), shape(T))
+            Y_test_pred[:, :, ind] = reshape(self._models_y[ind].predict(X, W), shape(Y))
+        Y_test_pred = Y_test_pred.mean(axis=2)
+        T_test_pred = T_test_pred.mean(axis=2)
+        Y_test_res = Y - Y_test_pred
+        T_test_res = T - T_test_pred
+        effects = reshape(self._model_final.predict(X), (-1, shape(Y)[1], shape(T)[1]))
+        Y_test_res_pred = reshape(np.einsum('ijk,ik->ij', effects, T_test_res), shape(Y))
+        mse = ((Y_test_res - Y_test_res_pred)**2).mean()
+        return mse
+
 
 class _DMLCateEstimatorBase(_RLearner):
     """
@@ -216,7 +244,7 @@ def fit(self, X, T_res, Y_res):
 
                 self._model.fit(cross_product(self._featurizer.fit_transform(X), T_res), Y_res)
 
-            def predict(self, X):
+            def predict(self, X, T_res=None):
                 # create an identity matrix of size d_t (or just a 1-element array if T was a vector)
                 # the nth row will allow us to compute the marginal effect of the nth component of treatment
                 eye = np.eye(self._d_t[0]) if self._d_t else np.array([1])

diff --git a/econml/tests/test_dml.py b/econml/tests/test_dml.py
@@ -45,6 +45,7 @@ def test_cate_api(self):
                                 # just make sure we can call the marginal_effect and effect methods
                                 est.marginal_effect(None, X)
                                 est.effect(0, T, X)
+                                est.score(Y, T, X, W)
 
     def test_can_use_vectors(self):
         """Test that we can pass vectors for T and Y (not only 2-dimensional arrays)."""
@@ -68,6 +69,7 @@ def test_discrete_treatments(self):
                                                   np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]),
                                                   np.ones((9, 1))),
                                        [0, 2, 1, -2, 0, -1, -1, 1, 0])
+        dml.score(np.array([2, 3, 1, 3, 2, 1, 1, 1]), np.array([3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1)))
 
     @staticmethod
     def _generate_recoverable_errors(a_X, X, a_W=None, W=None, featurizer=FunctionTransformer()):

diff --git a/notebooks/Double Machine Learning Examples.ipynb b/notebooks/Double Machine Learning Examples.ipynb