Skip to content

Commit

Permalink
Mehei/scoring (#44)
Browse files Browse the repository at this point in the history
* add scoring function
* add score example on dml notebook
  • Loading branch information
heimengqi authored May 3, 2019
1 parent 986f0e9 commit c09692c
Show file tree
Hide file tree
Showing 3 changed files with 194 additions and 39 deletions.
30 changes: 29 additions & 1 deletion econml/dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,34 @@ def effect(self, T0, T1, X):
T1 = self._one_hot_encoder.transform(reshape(self._label_encoder.transform(T1), (-1, 1)))[:, 1:]
return super().effect(T0, T1, X)

def score(self, Y, T, X=None, W=None):
if self._discrete_treatment:
T = self._one_hot_encoder.transform(reshape(self._label_encoder.transform(T), (-1, 1)))[:, 1:]
if T.ndim == 1:
T = reshape(T, (-1, 1))
if Y.ndim == 1:
Y = reshape(Y, (-1, 1))
if X is None:
X = np.ones((shape(Y)[0], 1))
if W is None:
W = np.empty((shape(Y)[0], 0))
Y_test_pred = np.zeros(shape(Y) + (self._n_splits,))
T_test_pred = np.zeros(shape(T) + (self._n_splits,))
for ind in range(self._n_splits):
if self._discrete_treatment:
T_test_pred[:, :, ind] = reshape(self._models_t[ind].predict(X, W)[:, 1:], shape(T))
else:
T_test_pred[:, :, ind] = reshape(self._models_t[ind].predict(X, W), shape(T))
Y_test_pred[:, :, ind] = reshape(self._models_y[ind].predict(X, W), shape(Y))
Y_test_pred = Y_test_pred.mean(axis=2)
T_test_pred = T_test_pred.mean(axis=2)
Y_test_res = Y - Y_test_pred
T_test_res = T - T_test_pred
effects = reshape(self._model_final.predict(X), (-1, shape(Y)[1], shape(T)[1]))
Y_test_res_pred = reshape(np.einsum('ijk,ik->ij', effects, T_test_res), shape(Y))
mse = ((Y_test_res - Y_test_res_pred)**2).mean()
return mse


class _DMLCateEstimatorBase(_RLearner):
"""
Expand Down Expand Up @@ -216,7 +244,7 @@ def fit(self, X, T_res, Y_res):

self._model.fit(cross_product(self._featurizer.fit_transform(X), T_res), Y_res)

def predict(self, X):
def predict(self, X, T_res=None):
# create an identity matrix of size d_t (or just a 1-element array if T was a vector)
# the nth row will allow us to compute the marginal effect of the nth component of treatment
eye = np.eye(self._d_t[0]) if self._d_t else np.array([1])
Expand Down
2 changes: 2 additions & 0 deletions econml/tests/test_dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def test_cate_api(self):
# just make sure we can call the marginal_effect and effect methods
est.marginal_effect(None, X)
est.effect(0, T, X)
est.score(Y, T, X, W)

def test_can_use_vectors(self):
"""Test that we can pass vectors for T and Y (not only 2-dimensional arrays)."""
Expand All @@ -68,6 +69,7 @@ def test_discrete_treatments(self):
np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]),
np.ones((9, 1))),
[0, 2, 1, -2, 0, -1, -1, 1, 0])
dml.score(np.array([2, 3, 1, 3, 2, 1, 1, 1]), np.array([3, 2, 1, 2, 3, 1, 1, 1]), np.ones((8, 1)))

@staticmethod
def _generate_recoverable_errors(a_X, X, a_W=None, W=None, featurizer=FunctionTransformer()):
Expand Down
201 changes: 163 additions & 38 deletions notebooks/Double Machine Learning Examples.ipynb

Large diffs are not rendered by default.

0 comments on commit c09692c

Please sign in to comment.