Source code for statsmodels.treatment.treatment_effects

"""Treatment effect estimators

follows largely Stata's teffects in Stata 13 manual

Created on Tue Jun  9 22:45:23 2015

Author: Josef Perktold
License: BSD-3

currently available

                     ATE        POM_0        POM_1
res_ipw       230.688598  3172.774059  3403.462658
res_aipw     -230.989201  3403.355253  3172.366052
res_aipw_wls -227.195618  3403.250651  3176.055033
res_ra       -239.639211  3403.242272  3163.603060
res_ipwra    -229.967078  3403.335639  3173.368561


Lots of todos, just the beginning, but most effects are available but not
standard errors, and no code structure that has a useful pattern

see https://github.com/statsmodels/statsmodels/issues/2443

Note: script requires cattaneo2 data file from Stata 14, hardcoded file path
could be loaded with webuse

"""

import numpy as np
from statsmodels.compat.pandas import Substitution
from scipy.linalg import block_diag
from statsmodels.regression.linear_model import WLS
from statsmodels.sandbox.regression.gmm import GMM
from statsmodels.stats.contrast import ContrastResults
from statsmodels.tools.docstring import indent


def _mom_ate(params, endog, tind, prob, weighted=True):
    """moment condition for average treatment effect

    This does not include a moment condition for potential outcome mean (POM).

    """
    w1 = (tind / prob)
    w0 = (1. - tind) / (1. - prob)
    if weighted:
        w0 /= w0.mean()
        w1 /= w1.mean()

    wdiff = w1 - w0

    return endog * wdiff - params


def _mom_atm(params, endog, tind, prob, weighted=True):
    """moment conditions for average treatment means (POM)

    moment conditions are POM0 and POM1
    """
    w1 = (tind / prob)
    w0 = (1. - tind) / (1. - prob)
    if weighted:
        w1 /= w1.mean()
        w0 /= w0.mean()

    return np.column_stack((endog * w0 - params[0], endog * w1 - params[1]))


def _mom_ols(params, endog, tind, prob, weighted=True):
    """
    moment condition for average treatment mean based on OLS dummy regression

    moment conditions are POM0 and POM1

    """
    w = tind / prob + (1-tind) / (1 - prob)

    treat_ind = np.column_stack((1 - tind, tind))
    mom = (w * (endog - treat_ind.dot(params)))[:, None] * treat_ind

    return mom


def _mom_ols_te(tm, endog, tind, prob, weighted=True):
    """
    moment condition for average treatment mean based on OLS dummy regression

    first moment is ATE
    second moment is POM0  (control)

    """
    w = tind / prob + (1-tind) / (1 - prob)

    treat_ind = np.column_stack((tind, np.ones(len(tind))))
    mom = (w * (endog - treat_ind.dot(tm)))[:, None] * treat_ind

    return mom


def _mom_olsex(params, model=None, exog=None, scale=None):
    exog = exog if exog is not None else model.exog
    fitted = model.predict(params, exog)
    resid = model.endog - fitted
    if scale is not None:
        resid /= scale
    mom = resid[:, None] * exog
    return mom


def ate_ipw(endog, tind, prob, weighted=True, probt=None):
    """average treatment effect based on basic inverse propensity weighting.

    """
    w1 = (tind / prob)
    w0 = (1. - tind) / (1. - prob)

    if probt is not None:
        w1 *= probt
        w0 *= probt

    if weighted:
        w0 /= w0.mean()
        w1 /= w1.mean()

    wdiff = w1 - w0

    return (endog * wdiff).mean(), (endog * w0).mean(), (endog * w1).mean()


class _TEGMMGeneric1(GMM):
    """GMM class to get cov_params for treatment effects

    This combines moment conditions for the selection/treatment model and the
    outcome model to get the standard errors for the treatment effect that
    takes the first step estimation of the treatment model into account.

    this also matches standard errors of ATE and POM in Stata

    """

    def __init__(self, endog, res_select, mom_outcome, exclude_tmoms=False,
                 **kwargs):
        super().__init__(endog, None, None)
        self.results_select = res_select
        self.mom_outcome = mom_outcome
        self.exclude_tmoms = exclude_tmoms
        self.__dict__.update(kwargs)

        # add xnames so it's not None
        # we don't have exog in init in this version
        if self.data.xnames is None:
            self.data.xnames = []

        # need information about decomposition of parameters
        if exclude_tmoms:
            self.k_select = 0
        else:
            self.k_select = len(res_select.model.data.param_names)

        if exclude_tmoms:
            # fittedvalues is still linpred
            self.prob = self.results_select.predict()
        else:
            self.prob = None

    def momcond(self, params):
        k_outcome = len(params) - self.k_select
        tm = params[:k_outcome]
        p_tm = params[k_outcome:]

        tind = self.results_select.model.endog

        if self.exclude_tmoms:
            prob = self.prob
        else:
            prob = self.results_select.model.predict(p_tm)

        moms_list = []
        mom_o = self.mom_outcome(tm, self.endog, tind, prob, weighted=True)
        moms_list.append(mom_o)

        if not self.exclude_tmoms:
            mom_t = self.results_select.model.score_obs(p_tm)
            moms_list.append(mom_t)

        moms = np.column_stack(moms_list)
        return moms


class _TEGMM(GMM):
    """GMM class to get cov_params for treatment effects

    This combines moment conditions for the selection/treatment model and the
    outcome model to get the standard errors for the treatment effect that
    takes the first step estimation of the treatment model into account.

    this also matches standard errors of ATE and POM in Stata

    """

    def __init__(self, endog, res_select, mom_outcome):
        super().__init__(endog, None, None)
        self.results_select = res_select
        self.mom_outcome = mom_outcome

        # add xnames so it's not None
        # we don't have exog in init in this version
        if self.data.xnames is None:
            self.data.xnames = []

    def momcond(self, params):
        tm = params[:2]
        p_tm = params[2:]

        tind = self.results_select.model.endog
        prob = self.results_select.model.predict(p_tm)
        momt = self.mom_outcome(tm, self.endog, tind, prob)  # weighted=True)
        moms = np.column_stack((momt,
                                self.results_select.model.score_obs(p_tm)))
        return moms


class _IPWGMM(_TEGMMGeneric1):
    """ GMM for aipw treatment effect and potential outcome

    uses unweighted outcome regression
    """

    def momcond(self, params):
        # Note: momcond in original order of observations
        ra = self.teff
        res_select = ra.results_select
        tind = ra.treatment
        endog = ra.model_pool.endog
        effect_group = self.effect_group

        tm = params[:2]
        ps = params[2:]

        prob_sel = np.asarray(res_select.model.predict(ps))
        prob_sel = np.clip(prob_sel, 0.01, 0.99)
        prob = prob_sel

        if effect_group == "all":
            probt = None
        elif effect_group in [1, "treated"]:
            probt = prob
        elif effect_group in [0, "untreated", "control"]:
            probt = 1 - prob
        elif isinstance(effect_group, np.ndarray):
            probt = probt
        else:
            raise ValueError("incorrect option for effect_group")

        w = tind / prob + (1 - tind) / (1 - prob)
        # Are we supposed to use scaled weights? doesn't cloesely match Stata
        # w1 = tind / prob
        # w2 = (1 - tind) / (1 - prob)
        # w = w1 / w1.sum() * tind.sum() + w2 / w2.sum() * (1 - tind).sum()
        if probt is not None:
            w *= probt

        treat_ind = np.column_stack((tind, np.ones(len(tind))))
        mm = (w * (endog - treat_ind.dot(tm)))[:, None] * treat_ind

        mom_select = res_select.model.score_obs(ps)
        moms = np.column_stack((mm, mom_select))
        return moms


class _AIPWGMM(_TEGMMGeneric1):
    """ GMM for aipw treatment effect and potential outcome

    uses unweighted outcome regression
    """

    def momcond(self, params):
        ra = self.teff
        treat_mask = ra.treat_mask
        res_select = ra.results_select

        ppom = params[1]
        mask = np.arange(len(params)) != 1
        params = params[mask]

        k = ra.results0.model.exog.shape[1]
        pm = params[0]  # ATE parameter
        p0 = params[1:k+1]
        p1 = params[k+1:2*k+1]
        ps = params[2*k+1:]
        mod0 = ra.results0.model
        mod1 = ra.results1.model
        # use reordered exog, endog so it matches sub models by group
        exog = ra.exog_grouped
        endog = ra.endog_grouped

        prob_sel = np.asarray(res_select.model.predict(ps))
        prob_sel = np.clip(prob_sel, 0.01, 0.99)

        prob0 = prob_sel[~treat_mask]
        prob1 = prob_sel[treat_mask]
        prob = np.concatenate((prob0, prob1))

        # outcome models by treatment unweighted
        fitted0 = mod0.predict(p0, exog)
        mom0 = _mom_olsex(p0, model=mod0)

        fitted1 = mod1.predict(p1, exog)
        mom1 = _mom_olsex(p1, model=mod1)

        mom_outcome = block_diag(mom0, mom1)

        # moments for target statistics, ATE and POM
        tind = ra.treatment
        tind = np.concatenate((tind[~treat_mask], tind[treat_mask]))
        correct0 = (endog - fitted0) / (1 - prob) * (1 - tind)
        correct1 = (endog - fitted1) / prob * tind

        tmean0 = fitted0 + correct0
        tmean1 = fitted1 + correct1
        ate = tmean1 - tmean0

        mm = ate - pm
        mpom = tmean0 - ppom
        mm = np.column_stack((mm, mpom))

        # Note: res_select has original data order,
        # mom_outcome and mm use grouped observations
        mom_select = res_select.model.score_obs(ps)
        mom_select = np.concatenate((mom_select[~treat_mask],
                                     mom_select[treat_mask]), axis=0)

        moms = np.column_stack((mm, mom_outcome, mom_select))
        return moms


class _AIPWWLSGMM(_TEGMMGeneric1):
    """ GMM for aipw-wls treatment effect and potential outcome

    uses weighted outcome regression
    """

    def momcond(self, params):
        ra = self.teff
        treat_mask = ra.treat_mask
        res_select = ra.results_select

        ppom = params[1]
        mask = np.arange(len(params)) != 1
        params = params[mask]

        k = ra.results0.model.exog.shape[1]
        pm = params[0]  # ATE parameter
        p0 = params[1:k+1]
        p1 = params[k+1:2*k+1]
        ps = params[-6:]
        mod0 = ra.results0.model
        mod1 = ra.results1.model
        # use reordered exog, endog so it matches sub models by group
        exog = ra.exog_grouped
        endog = ra.endog_grouped

        # todo: need weights in outcome models
        prob_sel = np.asarray(res_select.model.predict(ps))

        prob_sel = np.clip(prob_sel, 0.001, 0.999)

        prob0 = prob_sel[~treat_mask]
        prob1 = prob_sel[treat_mask]
        prob = np.concatenate((prob0, prob1))

        tind = 0
        ww0 = (1 - tind) / (1 - prob0) * ((1 - tind) / (1 - prob0) - 1)
        tind = 1
        ww1 = tind / prob1 * (tind / prob1 - 1)

        # outcome models by treatment using IPW weights
        fitted0 = mod0.predict(p0, exog)
        mom0 = _mom_olsex(p0, model=mod0) * ww0[:, None]

        fitted1 = mod1.predict(p1, exog)
        mom1 = _mom_olsex(p1, model=mod1) * ww1[:, None]

        mom_outcome = block_diag(mom0, mom1)

        # moments for target statistics, ATE and POM
        tind = ra.treatment
        tind = np.concatenate((tind[~treat_mask], tind[treat_mask]))

        correct0 = (endog - fitted0) / (1 - prob) * (1 - tind)
        correct1 = (endog - fitted1) / prob * tind

        tmean0 = fitted0 + correct0
        tmean1 = fitted1 + correct1
        ate = tmean1 - tmean0

        mm = ate - pm
        mpom = tmean0 - ppom
        mm = np.column_stack((mm, mpom))

        # Note: res_select has original data order,
        # mom_outcome and mm use grouped observations
        mom_select = res_select.model.score_obs(ps)
        mom_select = np.concatenate((mom_select[~treat_mask],
                                     mom_select[treat_mask]), axis=0)

        moms = np.column_stack((mm, mom_outcome, mom_select))
        return moms


class _RAGMM(_TEGMMGeneric1):
    """GMM for regression adjustment treatment effect and potential outcome

    uses unweighted outcome regression
    """

    def momcond(self, params):
        ra = self.teff

        ppom = params[1]
        mask = np.arange(len(params)) != 1
        params = params[mask]

        k = ra.results0.model.exog.shape[1]
        pm = params[0]
        p0 = params[1:k+1]
        p1 = params[-k:]
        mod0 = ra.results0.model
        mod1 = ra.results1.model
        # use reordered exog, endog so it matches sub models by group
        exog = ra.exog_grouped

        fitted0 = mod0.predict(p0, exog)
        mom0 = _mom_olsex(p0, model=mod0)

        fitted1 = mod1.predict(p1, exog)
        mom1 = _mom_olsex(p1, model=mod1)

        momout = block_diag(mom0, mom1)

        mm = fitted1 - fitted0 - pm
        mpom = fitted0 - ppom
        mm = np.column_stack((mm, mpom))
        if self.probt is not None:
            mm *= (self.probt / self.probt.mean())[:, None]

        moms = np.column_stack((mm, momout))
        return moms


class _IPWRAGMM(_TEGMMGeneric1):
    """ GMM for ipwra treatment effect and potential outcome
    """

    def momcond(self, params):
        ra = self.teff
        treat_mask = ra.treat_mask
        res_select = ra.results_select

        ppom = params[1]
        mask = np.arange(len(params)) != 1
        params = params[mask]

        k = ra.results0.model.exog.shape[1]
        pm = params[0]  # ATE parameter
        p0 = params[1:k+1]
        p1 = params[k+1:2*k+1]
        ps = params[-6:]
        mod0 = ra.results0.model
        mod1 = ra.results1.model

        # use reordered exog so it matches sub models by group
        exog = ra.exog_grouped
        tind = np.zeros(len(treat_mask))
        tind[-treat_mask.sum():] = 1

        # selection probability by group, propensity score
        prob_sel = np.asarray(res_select.model.predict(ps))
        prob_sel = np.clip(prob_sel, 0.001, 0.999)
        prob0 = prob_sel[~treat_mask]
        prob1 = prob_sel[treat_mask]

        effect_group = self.effect_group
        if effect_group == "all":
            w0 = 1 / (1 - prob0)
            w1 = 1 / prob1
            sind = 1
        elif effect_group in [1, "treated"]:
            w0 = prob0 / (1 - prob0)
            w1 = prob1 / prob1
            # for averaging effect on treated
            sind = tind / tind.mean()
        elif effect_group in [0, "untreated", "control"]:
            w0 = (1 - prob0) / (1 - prob0)
            w1 = (1 - prob1) / prob1

            sind = (1 - tind)
            sind /= sind.mean()
        else:
            raise ValueError("incorrect option for effect_group")

        # outcome models by treatment using IPW weights
        fitted0 = mod0.predict(p0, exog)
        mom0 = _mom_olsex(p0, model=mod0) * w0[:, None]

        fitted1 = mod1.predict(p1, exog)
        mom1 = _mom_olsex(p1, model=mod1) * w1[:, None]

        mom_outcome = block_diag(mom0, mom1)

        # moments for target statistics, ATE and POM
        mm = (fitted1 - fitted0 - pm) * sind
        mpom = (fitted0 - ppom) * sind
        mm = np.column_stack((mm, mpom))

        # Note: res_select has original data order,
        # mom_outcome and mm use grouped observations
        mom_select = res_select.model.score_obs(ps)
        mom_select = np.concatenate((mom_select[~treat_mask],
                                     mom_select[treat_mask]), axis=0)

        moms = np.column_stack((mm, mom_outcome, mom_select))
        return moms


[docs] class TreatmentEffectResults(ContrastResults): """ Results class for treatment effect estimation Parameters ---------- teff : instance of TreatmentEffect class results_gmm : instance of GMMResults class method : string Method and estimator of treatment effect. kwds: dict Other keywords with additional information. Notes ----- This class is a subclass of ContrastResults and inherits methods like summary, summary_frame and conf_int. Attributes correspond to a z-test given by ``GMMResults.t_test``. """ def __init__(self, teff, results_gmm, method, **kwds): super().__init__() k_params = len(results_gmm.params) constraints = np.zeros((3, k_params)) constraints[0, 0] = 1 constraints[1, 1] = 1 constraints[2, :2] = [1, 1] tt = results_gmm.t_test(constraints) self.__dict__.update(tt.__dict__) self.teff = teff self.results_gmm = results_gmm self.method = method # TODO: make those explicit? self.__dict__.update(kwds) self.c_names = ["ATE", "POM0", "POM1"]
doc_params_returns = """\ Parameters ---------- return_results : bool If True, then a results instance is returned. If False, just ATE, POM0 and POM1 are returned. effect_group : {"all", 0, 1} ``effectgroup`` determines for which population the effects are estimated. If effect_group is "all", then sample average treatment effect and potential outcomes are returned If effect_group is 1 or "treated", then effects on treated are returned. If effect_group is 0, "treated" or "control", then effects on untreated, i.e. control group, are returned. disp : bool Indicates whether the scipy optimizer should display the optimization results Returns ------- TreatmentEffectsResults instance or tuple (ATE, POM0, POM1) """ doc_params_returns2 = """\ Parameters ---------- return_results : bool If True, then a results instance is returned. If False, just ATE, POM0 and POM1 are returned. disp : bool Indicates whether the scipy optimizer should display the optimization results Returns ------- TreatmentEffectsResults instance or tuple (ATE, POM0, POM1) """
[docs] class TreatmentEffect: """ Estimate average treatment effect under conditional independence .. versionadded:: 0.14.0 This class estimates treatment effect and potential outcome using 5 different methods, ipw, ra, aipw, aipw-wls, ipw-ra. Standard errors and inference are based on the joint GMM representation of selection or treatment model, outcome model and effect functions. Parameters ---------- model : instance of a model class The model class should contain endog and exog for the outcome model. treatment : ndarray indicator array for observations with treatment (1) or without (0) results_select : results instance The results instance for the treatment or selection model. _cov_type : "HC0" Internal keyword. The keyword oes not affect GMMResults which always corresponds to HC0 standard errors. kwds : keyword arguments currently not used Notes ----- The outcome model is currently limited to a linear model based on OLS. Other outcome models, like Logit and Poisson, will become available in future. See `Treatment Effect notebook <../examples/notebooks/generated/treatment_effect.html>`__ for an overview. """ def __init__(self, model, treatment, results_select=None, _cov_type="HC0", **kwds): # Note _cov_type is only for preliminary estimators, # cov in GMM alwasy corresponds to HC0 self.__dict__.update(kwds) # currently not used self.treatment = np.asarray(treatment) self.treat_mask = treat_mask = (treatment == 1) if results_select is not None: self.results_select = results_select self.prob_select = results_select.predict() self.model_pool = model endog = model.endog exog = model.exog self.nobs = endog.shape[0] self._cov_type = _cov_type # no init keys are supported mod0 = model.__class__(endog[~treat_mask], exog[~treat_mask]) self.results0 = mod0.fit(cov_type=_cov_type) mod1 = model.__class__(endog[treat_mask], exog[treat_mask]) self.results1 = mod1.fit(cov_type=_cov_type) # self.predict_mean0 = self.model_pool.predict(self.results0.params # ).mean() # self.predict_mean1 = self.model_pool.predict(self.results1.params # ).mean() self.exog_grouped = np.concatenate((mod0.exog, mod1.exog), axis=0) self.endog_grouped = np.concatenate((mod0.endog, mod1.endog), axis=0)
[docs] @classmethod def from_data(cls, endog, exog, treatment, model='ols', **kwds): """create models from data not yet implemented """ raise NotImplementedError
[docs] def ipw(self, return_results=True, effect_group="all", disp=False): """Inverse Probability Weighted treatment effect estimation. Parameters ---------- return_results : bool If True, then a results instance is returned. If False, just ATE, POM0 and POM1 are returned. effect_group : {"all", 0, 1} ``effectgroup`` determines for which population the effects are estimated. If effect_group is "all", then sample average treatment effect and potential outcomes are returned. If effect_group is 1 or "treated", then effects on treated are returned. If effect_group is 0, "treated" or "control", then effects on untreated, i.e. control group, are returned. disp : bool Indicates whether the scipy optimizer should display the optimization results Returns ------- TreatmentEffectsResults instance or tuple (ATE, POM0, POM1) See Also -------- TreatmentEffectsResults """ endog = self.model_pool.endog tind = self.treatment prob = self.prob_select if effect_group == "all": probt = None elif effect_group in [1, "treated"]: probt = prob effect_group = 1 # standardize effect_group name elif effect_group in [0, "untreated", "control"]: probt = 1 - prob effect_group = 0 # standardize effect_group name elif isinstance(effect_group, np.ndarray): probt = effect_group effect_group = "user" # standardize effect_group name else: raise ValueError("incorrect option for effect_group") res_ipw = ate_ipw(endog, tind, prob, weighted=True, probt=probt) if not return_results: return res_ipw # gmm = _TEGMMGeneric1(endog, self.results_select, _mom_ols_te, # probt=probt) gmm = _IPWGMM(endog, self.results_select, None, teff=self, effect_group=effect_group) start_params = np.concatenate((res_ipw[:2], self.results_select.params)) res_gmm = gmm.fit(start_params=start_params, inv_weights=np.eye(len(start_params)), optim_method='nm', optim_args={"maxiter": 5000, "disp": disp}, maxiter=1, ) res = TreatmentEffectResults(self, res_gmm, "IPW", start_params=start_params, effect_group=effect_group, ) return res
[docs] @Substitution(params_returns=indent(doc_params_returns, " " * 8)) def ra(self, return_results=True, effect_group="all", disp=False): """ Regression Adjustment treatment effect estimation. \n%(params_returns)s See Also -------- TreatmentEffectsResults """ # need indicator for reordered observations tind = np.zeros(len(self.treatment)) tind[-self.treatment.sum():] = 1 if effect_group == "all": probt = None elif effect_group in [1, "treated"]: probt = tind effect_group = 1 # standardize effect_group name elif effect_group in [0, "untreated", "control"]: probt = 1 - tind effect_group = 0 # standardize effect_group name elif isinstance(effect_group, np.ndarray): # TODO: do we keep this? probt = effect_group effect_group = "user" # standardize effect_group name else: raise ValueError("incorrect option for effect_group") exog = self.exog_grouped # weight or indicator for effect_group if probt is not None: cw = (probt / probt.mean()) else: cw = 1 pom0 = (self.results0.predict(exog) * cw).mean() pom1 = (self.results1.predict(exog) * cw).mean() if not return_results: return pom1 - pom0, pom0, pom1 endog = self.model_pool.endog mod_gmm = _RAGMM(endog, self.results_select, None, teff=self, probt=probt) start_params = np.concatenate(( # ate, tt0.effect, [pom1 - pom0, pom0], self.results0.params, self.results1.params)) res_gmm = mod_gmm.fit(start_params=start_params, inv_weights=np.eye(len(start_params)), optim_method='nm', optim_args={"maxiter": 5000, "disp": disp}, maxiter=1, ) res = TreatmentEffectResults(self, res_gmm, "IPW", start_params=start_params, effect_group=effect_group, ) return res
[docs] @Substitution(params_returns=indent(doc_params_returns2, " " * 8)) def aipw(self, return_results=True, disp=False): """ ATE and POM from double robust augmented inverse probability weighting \n%(params_returns)s See Also -------- TreatmentEffectsResults """ nobs = self.nobs prob = self.prob_select tind = self.treatment exog = self.model_pool.exog # in original order correct0 = (self.results0.resid / (1 - prob[tind == 0])).sum() / nobs correct1 = (self.results1.resid / (prob[tind == 1])).sum() / nobs tmean0 = self.results0.predict(exog).mean() + correct0 tmean1 = self.results1.predict(exog).mean() + correct1 ate = tmean1 - tmean0 if not return_results: return ate, tmean0, tmean1 endog = self.model_pool.endog p2_aipw = np.asarray([ate, tmean0]) mag_aipw1 = _AIPWGMM(endog, self.results_select, None, teff=self) start_params = np.concatenate(( p2_aipw, self.results0.params, self.results1.params, self.results_select.params)) res_gmm = mag_aipw1.fit( start_params=start_params, inv_weights=np.eye(len(start_params)), optim_method='nm', optim_args={"maxiter": 5000, "disp": disp}, maxiter=1) res = TreatmentEffectResults(self, res_gmm, "IPW", start_params=start_params, effect_group="all", ) return res
[docs] @Substitution(params_returns=indent(doc_params_returns2, " " * 8)) def aipw_wls(self, return_results=True, disp=False): """ ATE and POM from double robust augmented inverse probability weighting. This uses weighted outcome regression, while `aipw` uses unweighted outcome regression. Option for effect on treated or on untreated is not available. \n%(params_returns)s See Also -------- TreatmentEffectsResults """ nobs = self.nobs prob = self.prob_select endog = self.model_pool.endog exog = self.model_pool.exog tind = self.treatment treat_mask = self.treat_mask ww1 = tind / prob * (tind / prob - 1) mod1 = WLS(endog[treat_mask], exog[treat_mask], weights=ww1[treat_mask]) result1 = mod1.fit(cov_type='HC1') mean1_ipw2 = result1.predict(exog).mean() ww0 = (1 - tind) / (1 - prob) * ((1 - tind) / (1 - prob) - 1) mod0 = WLS(endog[~treat_mask], exog[~treat_mask], weights=ww0[~treat_mask]) result0 = mod0.fit(cov_type='HC1') mean0_ipw2 = result0.predict(exog).mean() self.results_ipwwls0 = result0 self.results_ipwwls1 = result1 correct0 = (result0.resid / (1 - prob[tind == 0])).sum() / nobs correct1 = (result1.resid / (prob[tind == 1])).sum() / nobs tmean0 = mean0_ipw2 + correct0 tmean1 = mean1_ipw2 + correct1 ate = tmean1 - tmean0 if not return_results: return ate, tmean0, tmean1 p2_aipw_wls = np.asarray([ate, tmean0]).squeeze() # GMM mod_gmm = _AIPWWLSGMM(endog, self.results_select, None, teff=self) start_params = np.concatenate(( p2_aipw_wls, result0.params, result1.params, self.results_select.params)) res_gmm = mod_gmm.fit( start_params=start_params, inv_weights=np.eye(len(start_params)), optim_method='nm', optim_args={"maxiter": 5000, "disp": disp}, maxiter=1) res = TreatmentEffectResults(self, res_gmm, "IPW", start_params=start_params, effect_group="all", ) return res
[docs] @Substitution(params_returns=indent(doc_params_returns, " " * 8)) def ipw_ra(self, return_results=True, effect_group="all", disp=False): """ ATE and POM from inverse probability weighted regression adjustment. \n%(params_returns)s See Also -------- TreatmentEffectsResults """ treat_mask = self.treat_mask endog = self.model_pool.endog exog = self.model_pool.exog prob = self.prob_select prob0 = prob[~treat_mask] prob1 = prob[treat_mask] if effect_group == "all": w0 = 1 / (1 - prob0) w1 = 1 / prob1 exogt = exog elif effect_group in [1, "treated"]: w0 = prob0 / (1 - prob0) w1 = prob1 / prob1 exogt = exog[treat_mask] effect_group = 1 # standardize effect_group name elif effect_group in [0, "untreated", "control"]: w0 = (1 - prob0) / (1 - prob0) w1 = (1 - prob1) / prob1 exogt = exog[~treat_mask] effect_group = 0 # standardize effect_group name else: raise ValueError("incorrect option for effect_group") mod0 = WLS(endog[~treat_mask], exog[~treat_mask], weights=w0) result0 = mod0.fit(cov_type='HC1') # mean0_ipwra = (result0.predict(exog) * (prob / prob.mean())).mean() mean0_ipwra = result0.predict(exogt).mean() mod1 = WLS(endog[treat_mask], exog[treat_mask], weights=w1) result1 = mod1.fit(cov_type='HC1') # mean1_ipwra = (result1.predict(exog) * (prob / prob.mean())).mean() mean1_ipwra = result1.predict(exogt).mean() if not return_results: return mean1_ipwra - mean0_ipwra, mean0_ipwra, mean1_ipwra # GMM mod_gmm = _IPWRAGMM(endog, self.results_select, None, teff=self, effect_group=effect_group) start_params = np.concatenate(( [mean1_ipwra - mean0_ipwra, mean0_ipwra], result0.params, result1.params, np.asarray(self.results_select.params) )) res_gmm = mod_gmm.fit( start_params=start_params, inv_weights=np.eye(len(start_params)), optim_method='nm', optim_args={"maxiter": 2000, "disp": disp}, maxiter=1 ) res = TreatmentEffectResults(self, res_gmm, "IPW", start_params=start_params, effect_group=effect_group, ) return res

Last update: Nov 14, 2024