'''Partial Regression plot and residual plots to find misspecification
Author: Josef Perktold
License: BSD-3
Created: 2011-01-23
update
2011-06-05 : start to convert example to usable functions
2011-10-27 : docstrings
'''
from statsmodels.compat.python import lrange, string_types, lzip, range
import numpy as np
from patsy import dmatrix
from statsmodels.regression.linear_model import OLS
from statsmodels.sandbox.regression.predstd import wls_prediction_std
from statsmodels.graphics import utils
from statsmodels.nonparametric.smoothers_lowess import lowess
from statsmodels.tools.tools import maybe_unwrap_results
__all__ = ['plot_fit', 'plot_regress_exog', 'plot_partregress', 'plot_ccpr',
'plot_regress_exog', 'plot_partregress_grid', 'plot_ccpr_grid',
'add_lowess', 'abline_plot', 'influence_plot',
'plot_leverage_resid2']
#TODO: consider moving to influence module
def _high_leverage(results):
#TODO: replace 1 with k_constant
return 2. * (results.df_model + 1)/results.nobs
def add_lowess(ax, lines_idx=0, frac=.2, **lowess_kwargs):
"""
Add Lowess line to a plot.
Parameters
----------
ax : matplotlib Axes instance
The Axes to which to add the plot
lines_idx : int
This is the line on the existing plot to which you want to add
a smoothed lowess line.
frac : float
The fraction of the points to use when doing the lowess fit.
lowess_kwargs
Additional keyword arguments are passes to lowess.
Returns
-------
fig : matplotlib Figure instance
The figure that holds the instance.
"""
y0 = ax.get_lines()[lines_idx]._y
x0 = ax.get_lines()[lines_idx]._x
lres = lowess(y0, x0, frac=frac, **lowess_kwargs)
ax.plot(lres[:, 0], lres[:, 1], 'r', lw=1.5)
return ax.figure
[docs]def plot_fit(results, exog_idx, y_true=None, ax=None, **kwargs):
"""Plot fit against one regressor.
This creates one graph with the scatterplot of observed values compared to
fitted values.
Parameters
----------
results : result instance
result instance with resid, model.endog and model.exog as attributes
x_var : int or str
Name or index of regressor in exog matrix.
y_true : array_like
(optional) If this is not None, then the array is added to the plot
ax : Matplotlib AxesSubplot instance, optional
If given, this subplot is used to plot in instead of a new figure being
created.
kwargs
The keyword arguments are passed to the plot command for the fitted
values points.
Returns
-------
fig : Matplotlib figure instance
If `ax` is None, the created figure. Otherwise the figure to which
`ax` is connected.
Examples
--------
Load the Statewide Crime data set and perform linear regression with
`poverty` and `hs_grad` as variables and `murder` as the response
>>> import statsmodels.api as sm
>>> import matplotlib.pyplot as plt
>>> data = sm.datasets.statecrime.load_pandas().data
>>> murder = data['murder']
>>> X = data[['poverty', 'hs_grad']]
>>> X["constant"] = 1
>>> y = murder
>>> model = sm.OLS(y, X)
>>> results = model.fit()
Create a plot just for the variable 'Poverty':
>>> fig, ax = plt.subplots()
>>> fig = sm.graphics.plot_fit(results, 0, ax=ax)
>>> ax.set_ylabel("Murder Rate")
>>> ax.set_xlabel("Poverty Level")
>>> ax.set_title("Linear Regression")
>>> plt.show()
.. plot:: plots/graphics_plot_fit_ex.py
"""
fig, ax = utils.create_mpl_ax(ax)
exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)
results = maybe_unwrap_results(results)
#maybe add option for wendog, wexog
y = results.model.endog
x1 = results.model.exog[:, exog_idx]
x1_argsort = np.argsort(x1)
y = y[x1_argsort]
x1 = x1[x1_argsort]
ax.plot(x1, y, 'bo', label=results.model.endog_names)
if not y_true is None:
ax.plot(x1, y_true[x1_argsort], 'b-', label='True values')
title = 'Fitted values versus %s' % exog_name
prstd, iv_l, iv_u = wls_prediction_std(results)
ax.plot(x1, results.fittedvalues[x1_argsort], 'D', color='r',
label='fitted', **kwargs)
ax.vlines(x1, iv_l[x1_argsort], iv_u[x1_argsort], linewidth=1, color='k',
alpha=.7)
#ax.fill_between(x1, iv_l[x1_argsort], iv_u[x1_argsort], alpha=0.1,
# color='k')
ax.set_title(title)
ax.set_xlabel(exog_name)
ax.set_ylabel(results.model.endog_names)
ax.legend(loc='best', numpoints=1)
return fig
[docs]def plot_regress_exog(results, exog_idx, fig=None):
"""Plot regression results against one regressor.
This plots four graphs in a 2 by 2 figure: 'endog versus exog',
'residuals versus exog', 'fitted versus exog' and
'fitted plus residual versus exog'
Parameters
----------
results : result instance
result instance with resid, model.endog and model.exog as attributes
exog_idx : int
index of regressor in exog matrix
fig : Matplotlib figure instance, optional
If given, this figure is simply returned. Otherwise a new figure is
created.
Returns
-------
fig : matplotlib figure instance
"""
fig = utils.create_mpl_fig(fig)
exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)
results = maybe_unwrap_results(results)
#maybe add option for wendog, wexog
y_name = results.model.endog_names
x1 = results.model.exog[:, exog_idx]
prstd, iv_l, iv_u = wls_prediction_std(results)
ax = fig.add_subplot(2, 2, 1)
ax.plot(x1, results.model.endog, 'o', color='b', alpha=0.9, label=y_name)
ax.plot(x1, results.fittedvalues, 'D', color='r', label='fitted',
alpha=.5)
ax.vlines(x1, iv_l, iv_u, linewidth=1, color='k', alpha=.7)
ax.set_title('Y and Fitted vs. X', fontsize='large')
ax.set_xlabel(exog_name)
ax.set_ylabel(y_name)
ax.legend(loc='best')
ax = fig.add_subplot(2, 2, 2)
ax.plot(x1, results.resid, 'o')
ax.axhline(y=0, color='black')
ax.set_title('Residuals versus %s' % exog_name, fontsize='large')
ax.set_xlabel(exog_name)
ax.set_ylabel("resid")
ax = fig.add_subplot(2, 2, 3)
exog_noti = np.ones(results.model.exog.shape[1], bool)
exog_noti[exog_idx] = False
exog_others = results.model.exog[:, exog_noti]
from pandas import Series
fig = plot_partregress(results.model.data.orig_endog,
Series(x1, name=exog_name,
index=results.model.data.row_labels),
exog_others, obs_labels=False, ax=ax)
ax.set_title('Partial regression plot', fontsize='large')
#ax.set_ylabel("Fitted values")
#ax.set_xlabel(exog_name)
ax = fig.add_subplot(2, 2, 4)
fig = plot_ccpr(results, exog_idx, ax=ax)
ax.set_title('CCPR Plot', fontsize='large')
#ax.set_xlabel(exog_name)
#ax.set_ylabel("Fitted values + resids")
fig.suptitle('Regression Plots for %s' % exog_name, fontsize="large")
fig.tight_layout()
fig.subplots_adjust(top=.90)
return fig
def _partial_regression(endog, exog_i, exog_others):
"""Partial regression.
regress endog on exog_i conditional on exog_others
uses OLS
Parameters
----------
endog : array_like
exog : array_like
exog_others : array_like
Returns
-------
res1c : OLS results instance
(res1a, res1b) : tuple of OLS results instances
results from regression of endog on exog_others and of exog_i on
exog_others
"""
#FIXME: This function doesn't appear to be used.
res1a = OLS(endog, exog_others).fit()
res1b = OLS(exog_i, exog_others).fit()
res1c = OLS(res1a.resid, res1b.resid).fit()
return res1c, (res1a, res1b)
[docs]def plot_partregress(endog, exog_i, exog_others, data=None,
title_kwargs={}, obs_labels=True, label_kwargs={},
ax=None, ret_coords=False, **kwargs):
"""Plot partial regression for a single regressor.
Parameters
----------
endog : ndarray or string
endogenous or response variable. If string is given, you can use a
arbitrary translations as with a formula.
exog_i : ndarray or string
exogenous, explanatory variable. If string is given, you can use a
arbitrary translations as with a formula.
exog_others : ndarray or list of strings
other exogenous, explanatory variables. If a list of strings is given,
each item is a term in formula. You can use a arbitrary translations
as with a formula. The effect of these variables will be removed by
OLS regression.
data : DataFrame, dict, or recarray
Some kind of data structure with names if the other variables are
given as strings.
title_kwargs : dict
Keyword arguments to pass on for the title. The key to control the
fonts is fontdict.
obs_labels : bool or array-like
Whether or not to annotate the plot points with their observation
labels. If obs_labels is a boolean, the point labels will try to do
the right thing. First it will try to use the index of data, then
fall back to the index of exog_i. Alternatively, you may give an
array-like object corresponding to the obseveration numbers.
labels_kwargs : dict
Keyword arguments that control annotate for the observation labels.
ax : Matplotlib AxesSubplot instance, optional
If given, this subplot is used to plot in instead of a new figure being
created.
ret_coords : bool
If True will return the coordinates of the points in the plot. You
can use this to add your own annotations.
kwargs
The keyword arguments passed to plot for the points.
Returns
-------
fig : Matplotlib figure instance
If `ax` is None, the created figure. Otherwise the figure to which
`ax` is connected.
coords : list, optional
If ret_coords is True, return a tuple of arrays (x_coords, y_coords).
Notes
-----
The slope of the fitted line is the that of `exog_i` in the full
multiple regression. The individual points can be used to assess the
influence of points on the estimated coefficient.
See Also
--------
plot_partregress_grid : Plot partial regression for a set of regressors.
"""
#NOTE: there is no interaction between possible missing data and
#obs_labels yet, so this will need to be tweaked a bit for this case
fig, ax = utils.create_mpl_ax(ax)
# strings, use patsy to transform to data
if isinstance(endog, string_types):
endog = dmatrix(endog + "-1", data)
if isinstance(exog_others, string_types):
RHS = dmatrix(exog_others, data)
elif isinstance(exog_others, list):
RHS = "+".join(exog_others)
RHS = dmatrix(RHS, data)
else:
RHS = exog_others
if isinstance(exog_i, string_types):
exog_i = dmatrix(exog_i + "-1", data)
# all arrays or pandas-like
res_yaxis = OLS(endog, RHS).fit()
res_xaxis = OLS(exog_i, RHS).fit()
ax.plot(res_xaxis.resid, res_yaxis.resid, 'o', **kwargs)
fitted_line = OLS(res_yaxis.resid, res_xaxis.resid).fit()
fig = abline_plot(0, fitted_line.params[0], color='k', ax=ax)
x_axis_endog_name = res_xaxis.model.endog_names
if x_axis_endog_name == 'y': # for no names regression will just get a y
x_axis_endog_name = 'x' # this is misleading, so use x
ax.set_xlabel("e(%s | X)" % x_axis_endog_name)
ax.set_ylabel("e(%s | X)" % res_yaxis.model.endog_names)
ax.set_title('Partial Regression Plot', **title_kwargs)
#NOTE: if we want to get super fancy, we could annotate if a point is
#clicked using this widget
#http://stackoverflow.com/questions/4652439/
#is-there-a-matplotlib-equivalent-of-matlabs-datacursormode/
#4674445#4674445
if obs_labels is True:
if data is not None:
obs_labels = data.index
elif hasattr(exog_i, "index"):
obs_labels = exog_i.index
else:
obs_labels = res_xaxis.model.data.row_labels
#NOTE: row_labels can be None.
#Maybe we should fix this to never be the case.
if obs_labels is None:
obs_labels = lrange(len(exog_i))
if obs_labels is not False: # could be array-like
if len(obs_labels) != len(exog_i):
raise ValueError("obs_labels does not match length of exog_i")
label_kwargs.update(dict(ha="center", va="bottom"))
ax = utils.annotate_axes(lrange(len(obs_labels)), obs_labels,
lzip(res_xaxis.resid, res_yaxis.resid),
[(0, 5)] * len(obs_labels), "x-large", ax=ax,
**label_kwargs)
if ret_coords:
return fig, (res_xaxis.resid, res_yaxis.resid)
else:
return fig
def plot_partregress_grid(results, exog_idx=None, grid=None, fig=None):
"""Plot partial regression for a set of regressors.
Parameters
----------
results : results instance
A regression model results instance
exog_idx : None, list of ints, list of strings
(column) indices of the exog used in the plot, default is all.
grid : None or tuple of int (nrows, ncols)
If grid is given, then it is used for the arrangement of the subplots.
If grid is None, then ncol is one, if there are only 2 subplots, and
the number of columns is two otherwise.
fig : Matplotlib figure instance, optional
If given, this figure is simply returned. Otherwise a new figure is
created.
Returns
-------
fig : Matplotlib figure instance
If `fig` is None, the created figure. Otherwise `fig` itself.
Notes
-----
A subplot is created for each explanatory variable given by exog_idx.
The partial regression plot shows the relationship between the response
and the given explanatory variable after removing the effect of all other
explanatory variables in exog.
See Also
--------
plot_partregress : Plot partial regression for a single regressor.
plot_ccpr
References
----------
See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/partregr.htm
"""
import pandas
fig = utils.create_mpl_fig(fig)
exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)
#maybe add option for using wendog, wexog instead
y = pandas.Series(results.model.endog, name=results.model.endog_names)
exog = results.model.exog
k_vars = exog.shape[1]
#this function doesn't make sense if k_vars=1
if not grid is None:
nrows, ncols = grid
else:
if len(exog_idx) > 2:
nrows = int(np.ceil(len(exog_idx)/2.))
ncols = 2
title_kwargs = {"fontdict" : {"fontsize" : 'small'}}
else:
nrows = len(exog_idx)
ncols = 1
title_kwargs = {}
# for indexing purposes
other_names = np.array(results.model.exog_names)
for i, idx in enumerate(exog_idx):
others = lrange(k_vars)
others.pop(idx)
exog_others = pandas.DataFrame(exog[:, others],
columns=other_names[others])
ax = fig.add_subplot(nrows, ncols, i+1)
plot_partregress(y, pandas.Series(exog[:, idx],
name=other_names[idx]),
exog_others, ax=ax, title_kwargs=title_kwargs,
obs_labels=False)
ax.set_title("")
fig.suptitle("Partial Regression Plot", fontsize="large")
fig.tight_layout()
fig.subplots_adjust(top=.95)
return fig
[docs]def plot_ccpr(results, exog_idx, ax=None):
"""Plot CCPR against one regressor.
Generates a CCPR (component and component-plus-residual) plot.
Parameters
----------
results : result instance
A regression results instance.
exog_idx : int or string
Exogenous, explanatory variable. If string is given, it should
be the variable name that you want to use, and you can use arbitrary
translations as with a formula.
ax : Matplotlib AxesSubplot instance, optional
If given, it is used to plot in instead of a new figure being
created.
Returns
-------
fig : Matplotlib figure instance
If `ax` is None, the created figure. Otherwise the figure to which
`ax` is connected.
See Also
--------
plot_ccpr_grid : Creates CCPR plot for multiple regressors in a plot grid.
Notes
-----
The CCPR plot provides a way to judge the effect of one regressor on the
response variable by taking into account the effects of the other
independent variables. The partial residuals plot is defined as
Residuals + B_i*X_i versus X_i. The component adds the B_i*X_i versus
X_i to show where the fitted line would lie. Care should be taken if X_i
is highly correlated with any of the other independent variables. If this
is the case, the variance evident in the plot will be an underestimate of
the true variance.
References
----------
http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ccpr.htm
"""
fig, ax = utils.create_mpl_ax(ax)
exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)
results = maybe_unwrap_results(results)
x1 = results.model.exog[:, exog_idx]
#namestr = ' for %s' % self.name if self.name else ''
x1beta = x1*results.params[exog_idx]
ax.plot(x1, x1beta + results.resid, 'o')
from statsmodels.tools.tools import add_constant
mod = OLS(x1beta, add_constant(x1)).fit()
params = mod.params
fig = abline_plot(*params, **dict(ax=ax))
#ax.plot(x1, x1beta, '-')
ax.set_title('Component and component plus residual plot')
ax.set_ylabel("Residual + %s*beta_%d" % (exog_name, exog_idx))
ax.set_xlabel("%s" % exog_name)
return fig
def plot_ccpr_grid(results, exog_idx=None, grid=None, fig=None):
"""Generate CCPR plots against a set of regressors, plot in a grid.
Generates a grid of CCPR (component and component-plus-residual) plots.
Parameters
----------
results : result instance
uses exog and params of the result instance
exog_idx : None or list of int
(column) indices of the exog used in the plot
grid : None or tuple of int (nrows, ncols)
If grid is given, then it is used for the arrangement of the subplots.
If grid is None, then ncol is one, if there are only 2 subplots, and
the number of columns is two otherwise.
fig : Matplotlib figure instance, optional
If given, this figure is simply returned. Otherwise a new figure is
created.
Returns
-------
fig : Matplotlib figure instance
If `ax` is None, the created figure. Otherwise the figure to which
`ax` is connected.
Notes
-----
Partial residual plots are formed as::
Res + Betahat(i)*Xi versus Xi
and CCPR adds::
Betahat(i)*Xi versus Xi
See Also
--------
plot_ccpr : Creates CCPR plot for a single regressor.
References
----------
See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ccpr.htm
"""
fig = utils.create_mpl_fig(fig)
exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)
if grid is not None:
nrows, ncols = grid
else:
if len(exog_idx) > 2:
nrows = int(np.ceil(len(exog_idx)/2.))
ncols = 2
else:
nrows = len(exog_idx)
ncols = 1
seen_constant = 0
for i, idx in enumerate(exog_idx):
if results.model.exog[:, idx].var() == 0:
seen_constant = 1
continue
ax = fig.add_subplot(nrows, ncols, i+1-seen_constant)
fig = plot_ccpr(results, exog_idx=idx, ax=ax)
ax.set_title("")
fig.suptitle("Component-Component Plus Residual Plot", fontsize="large")
fig.tight_layout()
fig.subplots_adjust(top=.95)
return fig
[docs]def abline_plot(intercept=None, slope=None, horiz=None, vert=None,
model_results=None, ax=None, **kwargs):
"""
Plots a line given an intercept and slope.
intercept : float
The intercept of the line
slope : float
The slope of the line
horiz : float or array-like
Data for horizontal lines on the y-axis
vert : array-like
Data for verterical lines on the x-axis
model_results : statsmodels results instance
Any object that has a two-value `params` attribute. Assumed that it
is (intercept, slope)
ax : axes, optional
Matplotlib axes instance
kwargs
Options passed to matplotlib.pyplot.plt
Returns
-------
fig : Figure
The figure given by `ax.figure` or a new instance.
Examples
--------
>>> import numpy as np
>>> import statsmodels.api as sm
>>> np.random.seed(12345)
>>> X = sm.add_constant(np.random.normal(0, 20, size=30))
>>> y = np.dot(X, [25, 3.5]) + np.random.normal(0, 30, size=30)
>>> mod = sm.OLS(y,X).fit()
>>> fig = abline_plot(model_results=mod)
>>> ax = fig.axes[0]
>>> ax.scatter(X[:,1], y)
>>> ax.margins(.1)
>>> import matplotlib.pyplot as plt
>>> plt.show()
"""
if ax is not None: # get axis limits first thing, don't change these
x = ax.get_xlim()
else:
x = None
fig, ax = utils.create_mpl_ax(ax)
if model_results:
intercept, slope = model_results.params
if x is None:
x = [model_results.model.exog[:, 1].min(),
model_results.model.exog[:, 1].max()]
else:
if not (intercept is not None and slope is not None):
raise ValueError("specify slope and intercepty or model_results")
if x is None:
x = ax.get_xlim()
data_y = [x[0]*slope+intercept, x[1]*slope+intercept]
ax.set_xlim(x)
#ax.set_ylim(y)
from matplotlib.lines import Line2D
class ABLine2D(Line2D):
def update_datalim(self, ax):
ax.set_autoscale_on(False)
children = ax.get_children()
abline = [children[i] for i in range(len(children))
if isinstance(children[i], ABLine2D)][0]
x = ax.get_xlim()
y = [x[0]*slope+intercept, x[1]*slope+intercept]
abline.set_data(x, y)
ax.figure.canvas.draw()
#TODO: how to intercept something like a margins call and adjust?
line = ABLine2D(x, data_y, **kwargs)
ax.add_line(line)
ax.callbacks.connect('xlim_changed', line.update_datalim)
ax.callbacks.connect('ylim_changed', line.update_datalim)
if horiz:
ax.hline(horiz)
if vert:
ax.vline(vert)
return fig
[docs]def influence_plot(results, external=True, alpha=.05, criterion="cooks",
size=48, plot_alpha=.75, ax=None, **kwargs):
"""
Plot of influence in regression. Plots studentized resids vs. leverage.
Parameters
----------
results : results instance
A fitted model.
external : bool
Whether to use externally or internally studentized residuals. It is
recommended to leave external as True.
alpha : float
The alpha value to identify large studentized residuals. Large means
abs(resid_studentized) > t.ppf(1-alpha/2, dof=results.df_resid)
criterion : str {'DFFITS', 'Cooks'}
Which criterion to base the size of the points on. Options are
DFFITS or Cook's D.
size : float
The range of `criterion` is mapped to 10**2 - size**2 in points.
plot_alpha : float
The `alpha` of the plotted points.
ax : matplotlib Axes instance
An instance of a matplotlib Axes.
Returns
-------
fig : matplotlib figure
The matplotlib figure that contains the Axes.
Notes
-----
Row labels for the observations in which the leverage, measured by the
diagonal of the hat matrix, is high or the residuals are large, as the
combination of large residuals and a high influence value indicates an
influence point. The value of large residuals can be controlled using the
`alpha` parameter. Large leverage points are identified as
hat_i > 2 * (df_model + 1)/nobs.
"""
fig, ax = utils.create_mpl_ax(ax)
infl = results.get_influence()
if criterion.lower().startswith('dff'):
psize = infl.cooks_distance[0]
elif criterion.lower().startswith('coo'):
psize = np.abs(infl.dffits[0])
else:
raise ValueError("Criterion %s not understood" % criterion)
# scale the variables
#TODO: what is the correct scaling and the assumption here?
#we want plots to be comparable across different plots
#so we would need to use the expected distribution of criterion probably
old_range = np.ptp(psize)
new_range = size**2 - 8**2
psize = (psize - psize.min()) * new_range/old_range + 8**2
leverage = infl.hat_matrix_diag
if external:
resids = infl.resid_studentized_external
else:
resids = infl.resid_studentized_internal
from scipy import stats
cutoff = stats.t.ppf(1.-alpha/2, results.df_resid)
large_resid = np.abs(resids) > cutoff
large_leverage = leverage > _high_leverage(results)
large_points = np.logical_or(large_resid, large_leverage)
ax.scatter(leverage, resids, s=psize, alpha=plot_alpha)
# add point labels
labels = results.model.data.row_labels
if labels is None:
labels = lrange(len(resids))
ax = utils.annotate_axes(np.where(large_points)[0], labels,
lzip(leverage, resids),
lzip(-(psize/2)**.5, (psize/2)**.5), "x-large",
ax)
#TODO: make configurable or let people do it ex-post?
font = {"fontsize" : 16, "color" : "black"}
ax.set_ylabel("Studentized Residuals", **font)
ax.set_xlabel("H Leverage", **font)
ax.set_title("Influence Plot", **font)
return fig
[docs]def plot_leverage_resid2(results, alpha=.05, label_kwargs={}, ax=None,
**kwargs):
"""
Plots leverage statistics vs. normalized residuals squared
Parameters
----------
results : results instance
A regression results instance
alpha : float
Specifies the cut-off for large-standardized residuals. Residuals
are assumed to be distributed N(0, 1) with alpha=alpha.
label_kwargs : dict
The keywords to pass to annotate for the labels.
ax : Axes instance
Matplotlib Axes instance
Returns
-------
fig : matplotlib Figure
A matplotlib figure instance.
"""
from scipy.stats import zscore, norm
fig, ax = utils.create_mpl_ax(ax)
infl = results.get_influence()
leverage = infl.hat_matrix_diag
resid = zscore(results.resid)
ax.plot(resid**2, leverage, 'o', **kwargs)
ax.set_xlabel("Normalized residuals**2")
ax.set_ylabel("Leverage")
ax.set_title("Leverage vs. Normalized residuals squared")
large_leverage = leverage > _high_leverage(results)
#norm or t here if standardized?
cutoff = norm.ppf(1.-alpha/2)
large_resid = np.abs(resid) > cutoff
labels = results.model.data.row_labels
if labels is None:
labels = lrange(results.nobs)
index = np.where(np.logical_or(large_leverage, large_resid))[0]
ax = utils.annotate_axes(index, labels, lzip(resid**2, leverage),
[(0, 5)]*int(results.nobs), "large",
ax=ax, ha="center", va="bottom")
ax.margins(.075, .075)
return fig