Skip to content

Commit

Permalink
Merge pull request #42 from sdfordham/fix-penalized-synth
Browse files Browse the repository at this point in the history
Fix penalized synth
  • Loading branch information
sdfordham authored Feb 3, 2024
2 parents 982dd1b + fe71ea3 commit fb07cbd
Show file tree
Hide file tree
Showing 5 changed files with 218 additions and 65 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ A python module for the synthetic control method that provides implementations o
- Synthetic Control Method (Abadie & Gardeazabal 2003)
- Robust Synthetic Control Method (Amjad, Shah & Shen 2018)
- Augmented Synthetic Control Method (Ben-Michael, Feller & Rothstein 2021)
- ~~Penalized Synthetic Control Method (Abadie & L'Hour 2021)~~ (the implementation of this method is flawed, and in the process of being corrected.)
- Penalized Synthetic Control Method (Abadie & L'Hour 2021)

The package also provides methods for performing placebo tests with the above.

Expand Down
118 changes: 83 additions & 35 deletions examples/penalized/basque_penalized.ipynb

Large diffs are not rendered by default.

110 changes: 85 additions & 25 deletions pysyncon/penalized.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import annotations
from typing import Optional
from typing import Optional, Literal, Union

import numpy as np
import pandas as pd
from scipy.optimize import minimize, Bounds, LinearConstraint

from .dataprep import Dataprep
Expand All @@ -11,24 +12,33 @@
class PenalizedOptimMixin:
@staticmethod
def w_optimize(
V_mat: np.ndarray,
X0: np.ndarray,
X1: np.ndarray,
lambda_: float,
initial: Optional[np.ndarray] = None,
qp_method: Literal["SLSQP"] = "SLSQP",
qp_options: dict = {"maxiter": 1000},
) -> tuple[np.ndarray, float]:
"""Solves the weight optimisation problem in the penalized setting.
Parameters
----------
X0 : numpy.ndarray, shape (m, c)
V_mat : numpy.ndarray, shape (c, c)
The V matrix (using the notation of the Abadie, Diamond &
Hainmueller paper, this matrix is denoted by Γ in the Abadie and
L'Hour paper).
X0 : numpy.ndarray, shape (c, m)
Matrix with each column corresponding to a control unit and each
row is covariates.
X1 : numpy.ndarray, shape (m,)
X1 : numpy.ndarray, shape (c,)
Column vector giving the covariate values for the treated unit.
lambda_ : float,
Regularization parameter.
initial: numpy.ndarray, shape(m,), optional
Initial point to use in the optimisation problem.
qp_method : str, optional
Minimization routine to use in scipy minimize to solve the problem
, by default "SLSQP"
qp_options : dict, optional
Options for scipy minimize, by default {"maxiter": 1000}
Returns
-------
Expand All @@ -38,21 +48,28 @@ def w_optimize(
:meta private:
"""
_, n_c = X0.shape

diff = np.subtract(X0, X1.reshape(-1, 1))
r = np.linalg.norm(diff, axis=0)
r = np.diag(diff.T @ V_mat @ diff)

P = X0.T @ V_mat @ X0
q = -1.0 * X1.T @ V_mat @ X0 + (lambda_ / 2.0) * r.T

def fun(x):
return (X1 - X0 @ x).T @ (X1 - X0 @ x) + lambda_ * (r.T @ x)
return q.T @ x + 0.5 * x.T @ P @ x

bounds = Bounds(lb=np.full(n_c, 0.0), ub=np.full(n_c, 1.0))
constraints = LinearConstraint(A=np.full(n_c, 1.0), lb=1.0, ub=1.0)

if initial:
x0 = initial
else:
x0 = np.full(n_c, 1 / n_c)

res = minimize(fun=fun, x0=x0, bounds=bounds, constraints=constraints)
x0 = np.full(n_c, 1 / n_c)
res = minimize(
fun=fun,
x0=x0,
bounds=bounds,
constraints=constraints,
method=qp_method,
options=qp_options,
)
W, loss_W = res["x"], res["fun"]
return W, loss_W.item()

Expand All @@ -64,24 +81,67 @@ class PenalizedSynth(BaseSynth, PenalizedOptimMixin):

def __init__(self) -> None:
super().__init__()
self.W: Optional[np.ndarray] = None
self.loss_W: Optional[float] = None
self.lambda_: Optional[float] = None

def fit(self, dataprep: Dataprep, lambda_: float) -> None:
def fit(
self,
dataprep: Optional[Dataprep] = None,
X0: Optional[pd.DataFrame] = None,
X1: Optional[Union[pd.Series, pd.DataFrame]] = None,
lambda_: Optional[float] = 0.01,
custom_V: Optional[np.ndarray] = None,
) -> None:
"""Fit the model/calculate the weights.
Parameters
----------
dataprep : Dataprep
:class:`Dataprep` object containing data to model.
lambda_ : float
Ridge parameter to use.
dataprep : Dataprep, optional
:class:`Dataprep` object containing data to model, by default None.
X0 : pd.DataFrame, shape (c, m), optional
Matrix with each column corresponding to a control unit and each
row is a covariate value, by default None.
X1 : pandas.Series | pandas.DataFrame, shape (c, 1), optional
Column vector giving the covariate values for the treated unit, by
default None.
lambda_ : float, optional
Ridge parameter to use, default 0.01
custom_V : numpy.ndarray, shape (c, c), optional
Provide a V matrix (using the notation of the Abadie, Diamond &
Hainmueller paper, this matrix is denoted by Γ in the Abadie and
L'Hour paper), if not provided then the identity matrix is used
(equal importance to all covariates).
Returns
-------
NoneType
None
Raises
------
ValueError
if neither a Dataprep object nor all of (X0, X1) are
supplied.
"""
self.dataprep = dataprep
if dataprep:
self.dataprep = dataprep
X0, X1 = dataprep.make_covariate_mats()
else:
if X0 is None or X1 is None:
raise ValueError("dataprep must be set or (X0, X1) must all be set.")
self.lambda_ = lambda_

X0_df, X1_df = dataprep.make_outcome_mats()
X0, X1 = X0_df.to_numpy(), X1_df.to_numpy()
X = pd.concat([X0, X1], axis=1)
X_scaled = X.divide(X.std(axis=1), axis=0)
X0_scaled, X1_scaled = X_scaled.drop(columns=X1.name), X_scaled[X1.name]

X0_arr = X0_scaled.to_numpy()
X1_arr = X1_scaled.to_numpy()

if custom_V is None:
V_mat = np.identity(X0_arr.shape[0])
else:
V_mat = np.diag(custom_V)

W, _ = self.w_optimize(X0=X0, X1=X1, lambda_=lambda_)
self.W = W
W, loss_W = self.w_optimize(V_mat=V_mat, X0=X0_arr, X1=X1_arr, lambda_=lambda_)
self.W, self.loss_W = W, loss_W
43 changes: 43 additions & 0 deletions tests/test_penalized.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import unittest
import numpy as np
import pandas as pd

import pysyncon


class TestPenalizedSynth(unittest.TestCase):
def setUp(self):
self.dataprep = pysyncon.Dataprep(
foo=pd.DataFrame(
{
"time": [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4],
"name": [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3],
"dependent": np.random.random(12),
"predictor1": np.random.random(12),
"predictor2": np.random.random(12),
}
),
predictors=["predictor1"],
predictors_op="mean",
dependent="dependent",
unit_variable="name",
time_variable="time",
treatment_identifier=1,
controls_identifier=[2, 3],
time_predictors_prior=[2, 3],
time_optimize_ssr=[1, 2, 3],
special_predictors=[
("predictor1", [2], "mean"),
("predictor2", [1, 2], "median"),
("predictor2", [1, 2], "std"),
],
)
self.custom_V = np.full(4, 1.0)

def test_fit_no_data(self):
pen = pysyncon.PenalizedSynth()
self.assertRaises(ValueError, pen.fit)

def test_fit_custom_V(self):
pen = pysyncon.PenalizedSynth()
pen.fit(dataprep=self.dataprep, custom_V=self.custom_V)
10 changes: 6 additions & 4 deletions tests/test_penalized_basque.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,27 +50,29 @@ def setUp(self):
"Navarra (Comunidad Foral De)",
"Principado De Asturias",
"Rioja (La)",
"Spain (Espana)",
],
time_optimize_ssr=range(1960, 1970),
)
self.lambda_ = 0.01
self.weights = {
"Aragon": 0.0,
"Baleares (Islas)": 0.21680004,
"Baleares (Islas)": 0.0,
"Andalucia": 0.0,
"Canarias": 0.0,
"Cantabria": 0.0,
"Cantabria": 0.241,
"Castilla Y Leon": 0.0,
"Castilla-La Mancha": 0.0,
"Cataluna": 0.636479454,
"Cataluna": 0.759,
"Comunidad Valenciana": 0.0,
"Extremadura": 0.0,
"Galicia": 0.0,
"Madrid (Comunidad De)": 0.146720506,
"Madrid (Comunidad De)": 0.0,
"Murcia (Region de)": 0.0,
"Navarra (Comunidad Foral De)": 0.0,
"Principado De Asturias": 0.0,
"Rioja (La)": 0.0,
"Spain (Espana)": 0.0,
}

def test_weights(self):
Expand Down

0 comments on commit fb07cbd

Please sign in to comment.