# TODO: Determine which tests are valid for GLSAR, and under what conditions
# TODO: Fix issue with constant and GLS
# TODO: GLS: add options Iterative GLS, for iterative fgls if sigma is None
# TODO: GLS: default if sigma is none should be two-step GLS
# TODO: Check nesting when performing model based tests, lr, wald, lm
"""
This module implements standard regression models:
Generalized Least Squares (GLS)
Ordinary Least Squares (OLS)
Weighted Least Squares (WLS)
Generalized Least Squares with autoregressive error terms GLSAR(p)
Models are specified with an endogenous response variable and an
exogenous design matrix and are fit using their `fit` method.
Subclasses that have more complicated covariance matrices
should write over the 'whiten' method as the fit method
prewhitens the response by calling 'whiten'.
General reference for regression models:
D. C. Montgomery and E.A. Peck. "Introduction to Linear Regression
Analysis." 2nd. Ed., Wiley, 1992.
Econometrics references for regression models:
R. Davidson and J.G. MacKinnon. "Econometric Theory and Methods," Oxford,
2004.
W. Green. "Econometric Analysis," 5th ed., Pearson, 2003.
"""
from __future__ import print_function
from statsmodels.compat.python import lrange, lzip, range
import numpy as np
from scipy.linalg import toeplitz
from scipy import stats
from scipy import optimize
from statsmodels.tools.tools import chain_dot, pinv_extended
from statsmodels.tools.decorators import (cache_readonly,
cache_writable)
import statsmodels.base.model as base
import statsmodels.base.wrapper as wrap
from statsmodels.emplike.elregress import _ELRegOpts
import warnings
from statsmodels.tools.sm_exceptions import InvalidTestWarning
# need import in module instead of lazily to copy `__doc__`
from statsmodels.regression._prediction import PredictionResults
from . import _prediction as pred
__docformat__ = 'restructuredtext en'
__all__ = ['GLS', 'WLS', 'OLS', 'GLSAR', 'PredictionResults']
_fit_regularized_doc =\
r"""
Return a regularized fit to a linear regression model.
Parameters
----------
method : string
'elastic_net' and 'sqrt_lasso' are currently implemented.
alpha : scalar or array-like
The penalty weight. If a scalar, the same penalty weight
applies to all variables in the model. If a vector, it
must have the same length as `params`, and contains a
penalty weight for each coefficient.
L1_wt: scalar
The fraction of the penalty given to the L1 penalty term.
Must be between 0 and 1 (inclusive). If 0, the fit is a
ridge fit, if 1 it is a lasso fit.
start_params : array-like
Starting values for ``params``.
profile_scale : bool
If True the penalized fit is computed using the profile
(concentrated) log-likelihood for the Gaussian model.
Otherwise the fit uses the residual sum of squares.
refit : bool
If True, the model is refit using only the variables that
have non-zero coefficients in the regularized fit. The
refitted model is not regularized.
distributed : bool
If True, the model uses distributed methods for fitting,
will raise an error if True and partitions is None.
generator : function
generator used to partition the model, allows for handling
of out of memory/parallel computing.
partitions : scalar
The number of partitions desired for the distributed
estimation.
threshold : scalar or array-like
The threshold below which coefficients are zeroed out,
only used for distributed estimation
Returns
-------
A RegularizedResults instance.
Notes
-----
The elastic net uses a combination of L1 and L2 penalties.
The implementation closely follows the glmnet package in R.
The function that is minimized is:
.. math::
0.5*RSS/n + alpha*((1-L1\_wt)*|params|_2^2/2 + L1\_wt*|params|_1)
where RSS is the usual regression sum of squares, n is the
sample size, and :math:`|*|_1` and :math:`|*|_2` are the L1 and L2
norms.
For WLS and GLS, the RSS is calculated using the whitened endog and
exog data.
Post-estimation results are based on the same data used to
select variables, hence may be subject to overfitting biases.
The elastic_net method uses the following keyword arguments:
maxiter : int
Maximum number of iterations
cnvrg_tol : float
Convergence threshold for line searches
zero_tol : float
Coefficients below this threshold are treated as zero.
The square root lasso approach is a variation of the Lasso
that is largely self-tuning (the optimal tuning parameter
does not depend on the standard deviation of the regression
errors). If the errors are Gaussian, the tuning parameter
can be taken to be
alpha = 1.1 * np.sqrt(n) * norm.ppf(1 - 0.05 / (2 * p))
where n is the sample size and p is the number of predictors.
The square root lasso uses the following keyword arguments:
zero_tol : float
Coefficients below this threshold are treated as zero.
References
----------
Friedman, Hastie, Tibshirani (2008). Regularization paths for
generalized linear models via coordinate descent. Journal of
Statistical Software 33(1), 1-22 Feb 2010.
A Belloni, V Chernozhukov, L Wang (2011). Square-root Lasso:
pivotal recovery of sparse signals via conic programming.
Biometrika 98(4), 791-806.
https://arxiv.org/pdf/1009.5689.pdf
"""
def _get_sigma(sigma, nobs):
"""
Returns sigma (matrix, nobs by nobs) for GLS and the inverse of its
Cholesky decomposition. Handles dimensions and checks integrity.
If sigma is None, returns None, None. Otherwise returns sigma,
cholsigmainv.
"""
if sigma is None:
return None, None
sigma = np.asarray(sigma).squeeze()
if sigma.ndim == 0:
sigma = np.repeat(sigma, nobs)
if sigma.ndim == 1:
if sigma.shape != (nobs,):
raise ValueError("Sigma must be a scalar, 1d of length %s or a 2d "
"array of shape %s x %s" % (nobs, nobs, nobs))
cholsigmainv = 1/np.sqrt(sigma)
else:
if sigma.shape != (nobs, nobs):
raise ValueError("Sigma must be a scalar, 1d of length %s or a 2d "
"array of shape %s x %s" % (nobs, nobs, nobs))
cholsigmainv = np.linalg.cholesky(np.linalg.inv(sigma)).T
return sigma, cholsigmainv
class RegressionModel(base.LikelihoodModel):
"""
Base class for linear regression models. Should not be directly called.
Intended for subclassing.
"""
def __init__(self, endog, exog, **kwargs):
super(RegressionModel, self).__init__(endog, exog, **kwargs)
self._data_attr.extend(['pinv_wexog', 'wendog', 'wexog', 'weights'])
def initialize(self):
self.wexog = self.whiten(self.exog)
self.wendog = self.whiten(self.endog)
# overwrite nobs from class Model:
self.nobs = float(self.wexog.shape[0])
self._df_model = None
self._df_resid = None
self.rank = None
@property
def df_model(self):
"""
The model degree of freedom, defined as the rank of the regressor
matrix minus 1 if a constant is included.
"""
if self._df_model is None:
if self.rank is None:
self.rank = np.linalg.matrix_rank(self.exog)
self._df_model = float(self.rank - self.k_constant)
return self._df_model
@df_model.setter
def df_model(self, value):
self._df_model = value
@property
def df_resid(self):
"""
The residual degree of freedom, defined as the number of observations
minus the rank of the regressor matrix.
"""
if self._df_resid is None:
if self.rank is None:
self.rank = np.linalg.matrix_rank(self.exog)
self._df_resid = self.nobs - self.rank
return self._df_resid
@df_resid.setter
def df_resid(self, value):
self._df_resid = value
def whiten(self, X):
raise NotImplementedError("Subclasses should implement.")
def fit(self, method="pinv", cov_type='nonrobust', cov_kwds=None,
use_t=None, **kwargs):
"""
Full fit of the model.
The results include an estimate of covariance matrix, (whitened)
residuals and an estimate of scale.
Parameters
----------
method : str, optional
Can be "pinv", "qr". "pinv" uses the Moore-Penrose pseudoinverse
to solve the least squares problem. "qr" uses the QR
factorization.
cov_type : str, optional
See `regression.linear_model.RegressionResults` for a description
of the available covariance estimators
cov_kwds : list or None, optional
See `linear_model.RegressionResults.get_robustcov_results` for a
description required keywords for alternative covariance estimators
use_t : bool, optional
Flag indicating to use the Student's t distribution when computing
p-values. Default behavior depends on cov_type. See
`linear_model.RegressionResults.get_robustcov_results` for
implementation details.
Returns
-------
A RegressionResults class instance.
See Also
--------
regression.linear_model.RegressionResults
regression.linear_model.RegressionResults.get_robustcov_results
Notes
-----
The fit method uses the pseudoinverse of the design/exogenous variables
to solve the least squares minimization.
"""
if method == "pinv":
if not (hasattr(self, 'pinv_wexog') and
hasattr(self, 'normalized_cov_params') and
hasattr(self, 'rank')):
self.pinv_wexog, singular_values = pinv_extended(self.wexog)
self.normalized_cov_params = np.dot(
self.pinv_wexog, np.transpose(self.pinv_wexog))
# Cache these singular values for use later.
self.wexog_singular_values = singular_values
self.rank = np.linalg.matrix_rank(np.diag(singular_values))
beta = np.dot(self.pinv_wexog, self.wendog)
elif method == "qr":
if not (hasattr(self, 'exog_Q') and
hasattr(self, 'exog_R') and
hasattr(self, 'normalized_cov_params') and
hasattr(self, 'rank')):
Q, R = np.linalg.qr(self.wexog)
self.exog_Q, self.exog_R = Q, R
self.normalized_cov_params = np.linalg.inv(np.dot(R.T, R))
# Cache singular values from R.
self.wexog_singular_values = np.linalg.svd(R, 0, 0)
self.rank = np.linalg.matrix_rank(R)
else:
Q, R = self.exog_Q, self.exog_R
# used in ANOVA
self.effects = effects = np.dot(Q.T, self.wendog)
beta = np.linalg.solve(R, effects)
else:
raise ValueError('method has to be "pinv" or "qr"')
if self._df_model is None:
self._df_model = float(self.rank - self.k_constant)
if self._df_resid is None:
self.df_resid = self.nobs - self.rank
if isinstance(self, OLS):
lfit = OLSResults(
self, beta,
normalized_cov_params=self.normalized_cov_params,
cov_type=cov_type, cov_kwds=cov_kwds, use_t=use_t)
else:
lfit = RegressionResults(
self, beta,
normalized_cov_params=self.normalized_cov_params,
cov_type=cov_type, cov_kwds=cov_kwds, use_t=use_t,
**kwargs)
return RegressionResultsWrapper(lfit)
def predict(self, params, exog=None):
"""
Return linear predicted values from a design matrix.
Parameters
----------
params : array-like
Parameters of a linear model
exog : array-like, optional.
Design / exogenous data. Model exog is used if None.
Returns
-------
An array of fitted values
Notes
-----
If the model has not yet been fit, params is not optional.
"""
# JP: this doesn't look correct for GLMAR
# SS: it needs its own predict method
if exog is None:
exog = self.exog
return np.dot(exog, params)
def get_distribution(self, params, scale, exog=None, dist_class=None):
"""
Returns a random number generator for the predictive distribution.
Parameters
----------
params : array-like
The model parameters (regression coefficients).
scale : scalar
The variance parameter.
exog : array-like
The predictor variable matrix.
dist_class : class
A random number generator class. Must take 'loc' and 'scale'
as arguments and return a random number generator implementing
an ``rvs`` method for simulating random values. Defaults to Gaussian.
Returns
-------
gen
Frozen random number generator object with mean and variance
determined by the fitted linear model. Use the ``rvs`` method
to generate random values.
Notes
-----
Due to the behavior of ``scipy.stats.distributions objects``,
the returned random number generator must be called with
``gen.rvs(n)`` where ``n`` is the number of observations in
the data set used to fit the model. If any other value is
used for ``n``, misleading results will be produced.
"""
fit = self.predict(params, exog)
if dist_class is None:
from scipy.stats.distributions import norm
dist_class = norm
gen = dist_class(loc=fit, scale=np.sqrt(scale))
return gen
[docs]class GLS(RegressionModel):
__doc__ = r"""
Generalized least squares model with a general covariance structure.
%(params)s
sigma : scalar or array
`sigma` is the weighting matrix of the covariance.
The default is None for no scaling. If `sigma` is a scalar, it is
assumed that `sigma` is an n x n diagonal matrix with the given
scalar, `sigma` as the value of each diagonal element. If `sigma`
is an n-length vector, then `sigma` is assumed to be a diagonal
matrix with the given `sigma` on the diagonal. This should be the
same as WLS.
%(extra_params)s
Attributes
----------
pinv_wexog : array
`pinv_wexog` is the p x n Moore-Penrose pseudoinverse of `wexog`.
cholsimgainv : array
The transpose of the Cholesky decomposition of the pseudoinverse.
df_model : float
p - 1, where p is the number of regressors including the intercept.
of freedom.
df_resid : float
Number of observations n less the number of parameters p.
llf : float
The value of the likelihood function of the fitted model.
nobs : float
The number of observations n.
normalized_cov_params : array
p x p array :math:`(X^{T}\Sigma^{-1}X)^{-1}`
results : RegressionResults instance
A property that returns the RegressionResults class if fit.
sigma : array
`sigma` is the n x n covariance structure of the error terms.
wexog : array
Design matrix whitened by `cholsigmainv`
wendog : array
Response variable whitened by `cholsigmainv`
Notes
-----
If sigma is a function of the data making one of the regressors
a constant, then the current postestimation statistics will not be correct.
Examples
--------
>>> import numpy as np
>>> import statsmodels.api as sm
>>> data = sm.datasets.longley.load(as_pandas=False)
>>> data.exog = sm.add_constant(data.exog)
>>> ols_resid = sm.OLS(data.endog, data.exog).fit().resid
>>> res_fit = sm.OLS(ols_resid[1:], ols_resid[:-1]).fit()
>>> rho = res_fit.params
`rho` is a consistent estimator of the correlation of the residuals from
an OLS fit of the longley data. It is assumed that this is the true rho
of the AR process data.
>>> from scipy.linalg import toeplitz
>>> order = toeplitz(np.arange(16))
>>> sigma = rho**order
`sigma` is an n x n matrix of the autocorrelation structure of the
data.
>>> gls_model = sm.GLS(data.endog, data.exog, sigma=sigma)
>>> gls_results = gls_model.fit()
>>> print(gls_results.summary())
""" % {'params': base._model_params_doc,
'extra_params': base._missing_param_doc + base._extra_param_doc}
def __init__(self, endog, exog, sigma=None, missing='none', hasconst=None,
**kwargs):
# TODO: add options igls, for iterative fgls if sigma is None
# TODO: default if sigma is none should be two-step GLS
sigma, cholsigmainv = _get_sigma(sigma, len(endog))
super(GLS, self).__init__(endog, exog, missing=missing,
hasconst=hasconst, sigma=sigma,
cholsigmainv=cholsigmainv, **kwargs)
# store attribute names for data arrays
self._data_attr.extend(['sigma', 'cholsigmainv'])
[docs] def whiten(self, X):
"""
GLS whiten method.
Parameters
----------
X : array-like
Data to be whitened.
Returns
-------
np.dot(cholsigmainv,X)
See Also
--------
regression.GLS
"""
X = np.asarray(X)
if self.sigma is None or self.sigma.shape == ():
return X
elif self.sigma.ndim == 1:
if X.ndim == 1:
return X * self.cholsigmainv
else:
return X * self.cholsigmainv[:, None]
else:
return np.dot(self.cholsigmainv, X)
[docs] def loglike(self, params):
"""
Returns the value of the Gaussian log-likelihood function at params.
Given the whitened design matrix, the log-likelihood is evaluated
at the parameter vector `params` for the dependent variable `endog`.
Parameters
----------
params : array-like
The parameter estimates
Returns
-------
loglike : float
The value of the log-likelihood function for a GLS Model.
Notes
-----
The log-likelihood function for the normal distribution is
.. math:: -\\frac{n}{2}\\log\\left(\\left(Y-\\hat{Y}\\right)^{\\prime}\\left(Y-\\hat{Y}\\right)\\right)-\\frac{n}{2}\\left(1+\\log\\left(\\frac{2\\pi}{n}\\right)\\right)-\\frac{1}{2}\\log\\left(\\left|\\Sigma\\right|\\right)
Y and Y-hat are whitened.
"""
# TODO: combine this with OLS/WLS loglike and add _det_sigma argument
nobs2 = self.nobs / 2.0
SSR = np.sum((self.wendog - np.dot(self.wexog, params))**2, axis=0)
llf = -np.log(SSR) * nobs2 # concentrated likelihood
llf -= (1+np.log(np.pi/nobs2))*nobs2 # with likelihood constant
if np.any(self.sigma):
# FIXME: robust-enough check? unneeded if _det_sigma gets defined
if self.sigma.ndim == 2:
det = np.linalg.slogdet(self.sigma)
llf -= .5*det[1]
else:
llf -= 0.5*np.sum(np.log(self.sigma))
# with error covariance matrix
return llf
[docs] def hessian_factor(self, params, scale=None, observed=True):
"""Weights for calculating Hessian
Parameters
----------
params : ndarray
parameter at which Hessian is evaluated
scale : None or float
If scale is None, then the default scale will be calculated.
Default scale is defined by `self.scaletype` and set in fit.
If scale is not None, then it is used as a fixed scale.
observed : bool
If True, then the observed Hessian is returned. If false then the
expected information matrix is returned.
Returns
-------
hessian_factor : ndarray, 1d
A 1d weight vector used in the calculation of the Hessian.
The hessian is obtained by `(exog.T * hessian_factor).dot(exog)`
"""
if self.sigma is None or self.sigma.shape == ():
return np.ones(self.exog.shape[0])
elif self.sigma.ndim == 1:
return self.cholsigmainv
else:
return np.diag(self.cholsigmainv)
[docs] def fit_regularized(self, method="elastic_net", alpha=0.,
L1_wt=1., start_params=None, profile_scale=False,
refit=False, **kwargs):
# Docstring attached below
# Need to adjust since RSS/n term in elastic net uses nominal
# n in denominator
if self.sigma is not None:
alpha = alpha * np.sum(1 / np.diag(self.sigma)) / len(self.endog)
rslt = OLS(self.wendog, self.wexog).fit_regularized(
method=method, alpha=alpha,
L1_wt=L1_wt,
start_params=start_params,
profile_scale=profile_scale,
refit=refit, **kwargs)
from statsmodels.base.elastic_net import (
RegularizedResults, RegularizedResultsWrapper)
rrslt = RegularizedResults(self, rslt.params)
return RegularizedResultsWrapper(rrslt)
fit_regularized.__doc__ = _fit_regularized_doc
[docs]class WLS(RegressionModel):
__doc__ = """
A regression model with diagonal but non-identity covariance structure.
The weights are presumed to be (proportional to) the inverse of
the variance of the observations. That is, if the variables are
to be transformed by 1/sqrt(W) you must supply weights = 1/W.
%(params)s
weights : array-like, optional
1d array of weights. If you supply 1/W then the variables are
pre- multiplied by 1/sqrt(W). If no weights are supplied the
default value is 1 and WLS results are the same as OLS.
%(extra_params)s
Attributes
----------
weights : array
The stored weights supplied as an argument.
See Also
--------
regression.GLS
Examples
--------
>>> import numpy as np
>>> import statsmodels.api as sm
>>> Y = [1,3,4,5,2,3,4]
>>> X = range(1,8)
>>> X = sm.add_constant(X)
>>> wls_model = sm.WLS(Y,X, weights=list(range(1,8)))
>>> results = wls_model.fit()
>>> results.params
array([ 2.91666667, 0.0952381 ])
>>> results.tvalues
array([ 2.0652652 , 0.35684428])
>>> print(results.t_test([1, 0]))
<T test: effect=array([ 2.91666667]), sd=array([[ 1.41224801]]), t=array([[ 2.0652652]]), p=array([[ 0.04690139]]), df_denom=5>
>>> print(results.f_test([0, 1]))
<F test: F=array([[ 0.12733784]]), p=[[ 0.73577409]], df_denom=5, df_num=1>
Notes
-----
If the weights are a function of the data, then the post estimation
statistics such as fvalue and mse_model might not be correct, as the
package does not yet support no-constant regression.
""" % {'params': base._model_params_doc,
'extra_params': base._missing_param_doc + base._extra_param_doc}
def __init__(self, endog, exog, weights=1., missing='none', hasconst=None,
**kwargs):
weights = np.array(weights)
if weights.shape == ():
if (missing == 'drop' and 'missing_idx' in kwargs and
kwargs['missing_idx'] is not None):
# patsy may have truncated endog
weights = np.repeat(weights, len(kwargs['missing_idx']))
else:
weights = np.repeat(weights, len(endog))
# handle case that endog might be of len == 1
if len(weights) == 1:
weights = np.array([weights.squeeze()])
else:
weights = weights.squeeze()
super(WLS, self).__init__(endog, exog, missing=missing,
weights=weights, hasconst=hasconst, **kwargs)
nobs = self.exog.shape[0]
weights = self.weights
# Experimental normalization of weights
weights = weights / np.sum(weights) * nobs
if weights.size != nobs and weights.shape[0] != nobs:
raise ValueError('Weights must be scalar or same length as design')
[docs] def whiten(self, X):
"""
Whitener for WLS model, multiplies each column by sqrt(self.weights)
Parameters
----------
X : array-like
Data to be whitened
Returns
-------
whitened : array-like
sqrt(weights)*X
"""
X = np.asarray(X)
if X.ndim == 1:
return X * np.sqrt(self.weights)
elif X.ndim == 2:
return np.sqrt(self.weights)[:, None]*X
[docs] def loglike(self, params):
"""
Returns the value of the gaussian log-likelihood function at params.
Given the whitened design matrix, the log-likelihood is evaluated
at the parameter vector `params` for the dependent variable `Y`.
Parameters
----------
params : array-like
The parameter estimates.
Returns
-------
llf : float
The value of the log-likelihood function for a WLS Model.
Notes
--------
.. math:: -\\frac{n}{2}\\log\\left(Y-\\hat{Y}\\right)-\\frac{n}{2}\\left(1+\\log\\left(\\frac{2\\pi}{n}\\right)\\right)-\\frac{1}{2}log\\left(\\left|W\\right|\\right)
where :math:`W` is a diagonal matrix
"""
nobs2 = self.nobs / 2.0
SSR = np.sum((self.wendog - np.dot(self.wexog, params))**2, axis=0)
llf = -np.log(SSR) * nobs2 # concentrated likelihood
llf -= (1+np.log(np.pi/nobs2))*nobs2 # with constant
llf += 0.5 * np.sum(np.log(self.weights))
return llf
[docs] def hessian_factor(self, params, scale=None, observed=True):
"""Weights for calculating Hessian
Parameters
----------
params : ndarray
parameter at which Hessian is evaluated
scale : None or float
If scale is None, then the default scale will be calculated.
Default scale is defined by `self.scaletype` and set in fit.
If scale is not None, then it is used as a fixed scale.
observed : bool
If True, then the observed Hessian is returned. If false then the
expected information matrix is returned.
Returns
-------
hessian_factor : ndarray, 1d
A 1d weight vector used in the calculation of the Hessian.
The hessian is obtained by `(exog.T * hessian_factor).dot(exog)`
"""
return self.weights
[docs] def fit_regularized(self, method="elastic_net", alpha=0.,
L1_wt=1., start_params=None, profile_scale=False,
refit=False, **kwargs):
# Docstring attached below
# Need to adjust since RSS/n in elastic net uses nominal n in
# denominator
alpha = alpha * np.sum(self.weights) / len(self.weights)
rslt = OLS(self.wendog, self.wexog).fit_regularized(
method=method, alpha=alpha,
L1_wt=L1_wt,
start_params=start_params,
profile_scale=profile_scale,
refit=refit, **kwargs)
from statsmodels.base.elastic_net import (
RegularizedResults, RegularizedResultsWrapper)
rrslt = RegularizedResults(self, rslt.params)
return RegularizedResultsWrapper(rrslt)
fit_regularized.__doc__ = _fit_regularized_doc
[docs]class OLS(WLS):
__doc__ = """
A simple ordinary least squares model.
%(params)s
%(extra_params)s
Attributes
----------
weights : scalar
Has an attribute weights = array(1.0) due to inheritance from WLS.
See Also
--------
GLS
Examples
--------
>>> import numpy as np
>>>
>>> import statsmodels.api as sm
>>>
>>> Y = [1,3,4,5,2,3,4]
>>> X = range(1,8)
>>> X = sm.add_constant(X)
>>>
>>> model = sm.OLS(Y,X)
>>> results = model.fit()
>>> results.params
array([ 2.14285714, 0.25 ])
>>> results.tvalues
array([ 1.87867287, 0.98019606])
>>> print(results.t_test([1, 0]))
<T test: effect=array([ 2.14285714]), sd=array([[ 1.14062282]]), t=array([[ 1.87867287]]), p=array([[ 0.05953974]]), df_denom=5>
>>> print(results.f_test(np.identity(2)))
<F test: F=array([[ 19.46078431]]), p=[[ 0.00437251]], df_denom=5, df_num=2>
Notes
-----
No constant is added by the model unless you are using formulas.
""" % {'params': base._model_params_doc,
'extra_params': base._missing_param_doc + base._extra_param_doc}
# TODO: change example to use datasets. This was the point of datasets!
def __init__(self, endog, exog=None, missing='none', hasconst=None,
**kwargs):
super(OLS, self).__init__(endog, exog, missing=missing,
hasconst=hasconst, **kwargs)
if "weights" in self._init_keys:
self._init_keys.remove("weights")
[docs] def loglike(self, params, scale=None):
"""
The likelihood function for the OLS model.
Parameters
----------
params : array-like
The coefficients with which to estimate the log-likelihood.
scale : float or None
If None, return the profile (concentrated) log likelihood
(profiled over the scale parameter), else return the
log-likelihood using the given scale value.
Returns
-------
The likelihood function evaluated at params.
"""
nobs2 = self.nobs / 2.0
nobs = float(self.nobs)
resid = self.endog - np.dot(self.exog, params)
if hasattr(self, 'offset'):
resid -= self.offset
ssr = np.sum(resid**2)
if scale is None:
# profile log likelihood
llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2
else:
# log-likelihood
llf = -nobs2 * np.log(2 * np.pi * scale) - ssr / (2*scale)
return llf
[docs] def whiten(self, Y):
"""
OLS model whitener does nothing: returns Y.
"""
return Y
[docs] def score(self, params, scale=None):
"""
Evaluate the score function at a given point.
The score corresponds to the profile (concentrated)
log-likelihood in which the scale parameter has been profiled
out.
Parameters
----------
params : array-like
The parameter vector at which the score function is
computed.
scale : float or None
If None, return the profile (concentrated) log likelihood
(profiled over the scale parameter), else return the
log-likelihood using the given scale value.
Returns
-------
The score vector.
"""
if not hasattr(self, "_wexog_xprod"):
self._setup_score_hess()
xtxb = np.dot(self._wexog_xprod, params)
sdr = -self._wexog_x_wendog + xtxb
if scale is None:
ssr = self._wendog_xprod - 2 * np.dot(self._wexog_x_wendog.T,
params)
ssr += np.dot(params, xtxb)
return -self.nobs * sdr / ssr
else:
return -sdr / scale
def _setup_score_hess(self):
y = self.wendog
if hasattr(self, 'offset'):
y = y - self.offset
self._wendog_xprod = np.sum(y * y)
self._wexog_xprod = np.dot(self.wexog.T, self.wexog)
self._wexog_x_wendog = np.dot(self.wexog.T, y)
[docs] def hessian(self, params, scale=None):
"""
Evaluate the Hessian function at a given point.
Parameters
----------
params : array-like
The parameter vector at which the Hessian is computed.
scale : float or None
If None, return the profile (concentrated) log likelihood
(profiled over the scale parameter), else return the
log-likelihood using the given scale value.
Returns
-------
The Hessian matrix.
"""
if not hasattr(self, "_wexog_xprod"):
self._setup_score_hess()
xtxb = np.dot(self._wexog_xprod, params)
if scale is None:
ssr = self._wendog_xprod - 2 * np.dot(self._wexog_x_wendog.T,
params)
ssr += np.dot(params, xtxb)
ssrp = -2*self._wexog_x_wendog + 2*xtxb
hm = self._wexog_xprod / ssr - np.outer(ssrp, ssrp) / ssr**2
return -self.nobs * hm / 2
else:
return -self._wexog_xprod / scale
[docs] def hessian_factor(self, params, scale=None, observed=True):
"""Weights for calculating Hessian
Parameters
----------
params : ndarray
parameter at which Hessian is evaluated
scale : None or float
If scale is None, then the default scale will be calculated.
Default scale is defined by `self.scaletype` and set in fit.
If scale is not None, then it is used as a fixed scale.
observed : bool
If True, then the observed Hessian is returned. If false then the
expected information matrix is returned.
Returns
-------
hessian_factor : ndarray, 1d
A 1d weight vector used in the calculation of the Hessian.
The hessian is obtained by `(exog.T * hessian_factor).dot(exog)`
"""
return np.ones(self.exog.shape[0])
[docs] def fit_regularized(self, method="elastic_net", alpha=0.,
L1_wt=1., start_params=None, profile_scale=False,
refit=False, **kwargs):
# Docstring attached below
# In the future we could add support for other penalties, e.g. SCAD.
if method not in ("elastic_net", "sqrt_lasso"):
msg = "Unknown method '%s' for fit_regularized" % method
raise ValueError(msg)
# Set default parameters.
defaults = {"maxiter": 50, "cnvrg_tol": 1e-10,
"zero_tol": 1e-8}
defaults.update(kwargs)
if method == "sqrt_lasso":
from statsmodels.base.elastic_net import (
RegularizedResults, RegularizedResultsWrapper
)
params = self._sqrt_lasso(alpha, refit, defaults["zero_tol"])
results = RegularizedResults(self, params)
return RegularizedResultsWrapper(results)
from statsmodels.base.elastic_net import fit_elasticnet
if L1_wt == 0:
return self._fit_ridge(alpha)
# If a scale parameter is passed in, the non-profile
# likelihood (residual sum of squares divided by -2) is used,
# otherwise the profile likelihood is used.
if profile_scale:
loglike_kwds = {}
score_kwds = {}
hess_kwds = {}
else:
loglike_kwds = {"scale": 1}
score_kwds = {"scale": 1}
hess_kwds = {"scale": 1}
return fit_elasticnet(self, method=method,
alpha=alpha,
L1_wt=L1_wt,
start_params=start_params,
loglike_kwds=loglike_kwds,
score_kwds=score_kwds,
hess_kwds=hess_kwds,
refit=refit,
check_step=False,
**defaults)
fit_regularized.__doc__ = _fit_regularized_doc
def _sqrt_lasso(self, alpha, refit, zero_tol):
try:
import cvxopt
except ImportError:
msg = "sqrt_lasso fitting requires the cvxopt module to be installed"
raise ValueError(msg)
n = len(self.endog)
p = self.exog.shape[1]
h0 = cvxopt.matrix(0., (2*p+1, 1))
h1 = cvxopt.matrix(0., (n+1, 1))
h1[1:, 0] = cvxopt.matrix(self.endog, (n, 1))
G0 = cvxopt.spmatrix([], [], [], (2*p+1, 2*p+1))
for i in range(1, 2*p+1):
G0[i, i] = -1
G1 = cvxopt.matrix(0., (n+1, 2*p+1))
G1[0, 0] = -1
G1[1:, 1:p+1] = self.exog
G1[1:, p+1:] = -self.exog
c = cvxopt.matrix(alpha / n, (2*p + 1, 1))
c[0] = 1 / np.sqrt(n)
from cvxopt import solvers
solvers.options["show_progress"] = False
rslt = solvers.socp(c, Gl=G0, hl=h0, Gq=[G1], hq=[h1])
x = np.asarray(rslt['x']).flat
bp = x[1:p+1]
bn = x[p+1:]
params = bp - bn
if not refit:
return params
ii = np.flatnonzero(np.abs(params) > zero_tol)
rfr = OLS(self.endog, self.exog[:, ii]).fit()
params *= 0
params[ii] = rfr.params
return params
def _fit_ridge(self, alpha):
"""
Fit a linear model using ridge regression.
Parameters
----------
alpha : scalar or array-like
The penalty weight. If a scalar, the same penalty weight
applies to all variables in the model. If a vector, it
must have the same length as `params`, and contains a
penalty weight for each coefficient.
Notes
-----
Equivalent to fit_regularized with L1_wt = 0 (but implemented
more efficiently).
"""
u, s, vt = np.linalg.svd(self.exog, 0)
v = vt.T
q = np.dot(u.T, self.endog) * s
s2 = s * s
if np.isscalar(alpha):
sd = s2 + alpha * self.nobs
params = q / sd
params = np.dot(v, params)
else:
vtav = self.nobs * np.dot(vt, alpha[:, None] * v)
d = np.diag(vtav) + s2
np.fill_diagonal(vtav, d)
r = np.linalg.solve(vtav, q)
params = np.dot(v, r)
from statsmodels.base.elastic_net import RegularizedResults
return RegularizedResults(self, params)
[docs]class GLSAR(GLS):
__doc__ = """
A regression model with an AR(p) covariance structure.
%(params)s
rho : int
Order of the autoregressive covariance
%(extra_params)s
Examples
--------
>>> import statsmodels.api as sm
>>> X = range(1,8)
>>> X = sm.add_constant(X)
>>> Y = [1,3,4,5,8,10,9]
>>> model = sm.GLSAR(Y, X, rho=2)
>>> for i in range(6):
... results = model.fit()
... print("AR coefficients: {0}".format(model.rho))
... rho, sigma = sm.regression.yule_walker(results.resid,
... order=model.order)
... model = sm.GLSAR(Y, X, rho)
...
AR coefficients: [ 0. 0.]
AR coefficients: [-0.52571491 -0.84496178]
AR coefficients: [-0.6104153 -0.86656458]
AR coefficients: [-0.60439494 -0.857867 ]
AR coefficients: [-0.6048218 -0.85846157]
AR coefficients: [-0.60479146 -0.85841922]
>>> results.params
array([-0.66661205, 1.60850853])
>>> results.tvalues
array([ -2.10304127, 21.8047269 ])
>>> print(results.t_test([1, 0]))
<T test: effect=array([-0.66661205]), sd=array([[ 0.31697526]]), t=array([[-2.10304127]]), p=array([[ 0.06309969]]), df_denom=3>
>>> print(results.f_test(np.identity(2)))
<F test: F=array([[ 1815.23061844]]), p=[[ 0.00002372]], df_denom=3, df_num=2>
Or, equivalently
>>> model2 = sm.GLSAR(Y, X, rho=2)
>>> res = model2.iterative_fit(maxiter=6)
>>> model2.rho
array([-0.60479146, -0.85841922])
Notes
-----
GLSAR is considered to be experimental.
The linear autoregressive process of order p--AR(p)--is defined as:
TODO
""" % {'params': base._model_params_doc,
'extra_params': base._missing_param_doc + base._extra_param_doc}
def __init__(self, endog, exog=None, rho=1, missing='none', **kwargs):
# this looks strange, interpreting rho as order if it is int
if isinstance(rho, np.int):
self.order = rho
self.rho = np.zeros(self.order, np.float64)
else:
self.rho = np.squeeze(np.asarray(rho))
if len(self.rho.shape) not in [0, 1]:
raise ValueError("AR parameters must be a scalar or a vector")
if self.rho.shape == ():
self.rho.shape = (1,)
self.order = self.rho.shape[0]
if exog is None:
# JP this looks wrong, should be a regression on constant
# results for rho estimate now identical to yule-walker on y
# super(AR, self).__init__(endog, add_constant(endog))
super(GLSAR, self).__init__(endog, np.ones((endog.shape[0], 1)),
missing=missing, **kwargs)
else:
super(GLSAR, self).__init__(endog, exog, missing=missing,
**kwargs)
[docs] def iterative_fit(self, maxiter=3, rtol=1e-4, **kwds):
"""
Perform an iterative two-stage procedure to estimate a GLS model.
The model is assumed to have AR(p) errors, AR(p) parameters and
regression coefficients are estimated iteratively.
Parameters
----------
maxiter : integer, optional
the number of iterations
rtol : float, optional
Relative tolerance between estimated coefficients to stop the
estimation. Stops if
max(abs(last - current) / abs(last)) < rtol
"""
# TODO: update this after going through example.
converged = False
i = -1 # need to initialize for maxiter < 1 (skip loop)
history = {'params': [], 'rho': [self.rho]}
for i in range(maxiter - 1):
if hasattr(self, 'pinv_wexog'):
del self.pinv_wexog
self.initialize()
results = self.fit()
history['params'].append(results.params)
if i == 0:
last = results.params
else:
diff = np.max(np.abs(last - results.params) / np.abs(last))
if diff < rtol:
converged = True
break
last = results.params
self.rho, _ = yule_walker(results.resid,
order=self.order, df=None)
history['rho'].append(self.rho)
# why not another call to self.initialize
# Use kwarg to insert history
if not converged and maxiter > 0:
# maxiter <= 0 just does OLS
if hasattr(self, 'pinv_wexog'):
del self.pinv_wexog
self.initialize()
# if converged then this is a duplicate fit, because we didn't
# update rho
results = self.fit(history=history, **kwds)
results.iter = i + 1
# add last fit to history, not if duplicate fit
if not converged:
results.history['params'].append(results.params)
results.iter += 1
results.converged = converged
return results
[docs] def whiten(self, X):
"""
Whiten a series of columns according to an AR(p)
covariance structure. This drops initial p observations.
Parameters
----------
X : array-like
The data to be whitened,
Returns
-------
whitened array
"""
# TODO: notation for AR process
X = np.asarray(X, np.float64)
_X = X.copy()
# the following loops over the first axis, works for 1d and nd
for i in range(self.order):
_X[(i+1):] = _X[(i+1):] - self.rho[i] * X[0:-(i+1)]
return _X[self.order:]
[docs]def yule_walker(X, order=1, method="unbiased", df=None, inv=False,
demean=True):
"""
Estimate AR(p) parameters from a sequence X using Yule-Walker equation.
Unbiased or maximum-likelihood estimator (mle)
See, for example:
http://en.wikipedia.org/wiki/Autoregressive_moving_average_model
Parameters
----------
X : array-like
1d array
order : integer, optional
The order of the autoregressive process. Default is 1.
method : string, optional
Method can be 'unbiased' or 'mle' and this determines
denominator in estimate of autocorrelation function (ACF) at
lag k. If 'mle', the denominator is n=X.shape[0], if 'unbiased'
the denominator is n-k. The default is unbiased.
df : integer, optional
Specifies the degrees of freedom. If `df` is supplied, then it
is assumed the X has `df` degrees of freedom rather than `n`.
Default is None.
inv : bool
If inv is True the inverse of R is also returned. Default is
False.
demean : bool
True, the mean is subtracted from `X` before estimation.
Returns
-------
rho
The autoregressive coefficients
sigma
TODO
Examples
--------
>>> import statsmodels.api as sm
>>> from statsmodels.datasets.sunspots import load
>>> data = load(as_pandas=False)
>>> rho, sigma = sm.regression.yule_walker(data.endog,
... order=4, method="mle")
>>> rho
array([ 1.28310031, -0.45240924, -0.20770299, 0.04794365])
>>> sigma
16.808022730464351
"""
# TODO: define R better, look back at notes and technical notes on YW.
# First link here is useful
# http://www-stat.wharton.upenn.edu/~steele/Courses/956/ResourceDetails/YuleWalkerAndMore.htm
method = str(method).lower()
if method not in ["unbiased", "mle"]:
raise ValueError("ACF estimation method must be 'unbiased' or 'MLE'")
X = np.array(X, dtype=np.float64)
if demean:
X -= X.mean() # automatically demean's X
n = df or X.shape[0]
if method == "unbiased": # this is df_resid ie., n - p
denom = lambda k: n - k
else:
denom = lambda k: n
if X.ndim > 1 and X.shape[1] != 1:
raise ValueError("expecting a vector to estimate AR parameters")
r = np.zeros(order+1, np.float64)
r[0] = (X**2).sum() / denom(0)
for k in range(1, order+1):
r[k] = (X[0:-k] * X[k:]).sum() / denom(k)
R = toeplitz(r[:-1])
rho = np.linalg.solve(R, r[1:])
sigmasq = r[0] - (r[1:]*rho).sum()
if inv:
return rho, np.sqrt(sigmasq), np.linalg.inv(R)
else:
return rho, np.sqrt(sigmasq)
[docs]def burg(endog, order=1, demean=True):
"""
Burg's AP(p) parameter estimator
Parameters
----------
endog : array-like
The endogenous variable
order : int, optional
Order of the AR. Default is 1.
demean : bool, optional
Flag indicating to subtract the mean from endog before estimation
Returns
-------
rho : ndarray
AR(p) coefficients computed using Burg's algorithm
sigma2 : float
Estimate of the residual variance
Notes
-----
AR model estimated includes a constant estimated using the sample mean.
This value is not reported.
References
----------
.. [1] Brockwell, P.J. and Davis, R.A., 2016. Introduction to time series
and forecasting. Springer.
"""
# Avoid circular imports
from statsmodels.tsa.stattools import levinson_durbin_pacf, pacf_burg
endog = np.squeeze(np.asarray(endog))
if endog.ndim != 1:
raise ValueError('endog must be 1-d or squeezable to 1-d.')
order = int(order)
if order < 1:
raise ValueError('order must be an integer larger than 1')
if demean:
endog = endog - endog.mean()
pacf, sigma = pacf_burg(endog, order, demean=demean)
ar, _ = levinson_durbin_pacf(pacf)
return ar, sigma[-1]
[docs]class RegressionResults(base.LikelihoodModelResults):
r"""
This class summarizes the fit of a linear regression model.
It handles the output of contrasts, estimates of covariance, etc.
Attributes
----------
pinv_wexog
See specific model class docstring
cov_HC0
Heteroscedasticity robust covariance matrix. See HC0_se below.
cov_HC1
Heteroscedasticity robust covariance matrix. See HC1_se below.
cov_HC2
Heteroscedasticity robust covariance matrix. See HC2_se below.
cov_HC3
Heteroscedasticity robust covariance matrix. See HC3_se below.
cov_type
Parameter covariance estimator used for standard errors and t-stats
df_model
Model degrees of freedom. The number of regressors `p`. Does not
include the constant if one is present
df_resid
Residual degrees of freedom. `n - p - 1`, if a constant is present.
`n - p` if a constant is not included.
het_scale
adjusted squared residuals for heteroscedasticity robust standard
errors. Is only available after `HC#_se` or `cov_HC#` is called.
See HC#_se for more information.
history
Estimation history for iterative estimators
HC0_se
White's (1980) heteroskedasticity robust standard errors.
Defined as sqrt(diag(X.T X)^(-1)X.T diag(e_i^(2)) X(X.T X)^(-1)
where e_i = resid[i]
HC0_se is a cached property.
When HC0_se or cov_HC0 is called the RegressionResults instance will
then have another attribute `het_scale`, which is in this case is just
resid**2.
HC1_se
MacKinnon and White's (1985) alternative heteroskedasticity robust
standard errors.
Defined as sqrt(diag(n/(n-p)*HC_0)
HC1_see is a cached property.
When HC1_se or cov_HC1 is called the RegressionResults instance will
then have another attribute `het_scale`, which is in this case is
n/(n-p)*resid**2.
HC2_se
MacKinnon and White's (1985) alternative heteroskedasticity robust
standard errors.
Defined as (X.T X)^(-1)X.T diag(e_i^(2)/(1-h_ii)) X(X.T X)^(-1)
where h_ii = x_i(X.T X)^(-1)x_i.T
HC2_see is a cached property.
When HC2_se or cov_HC2 is called the RegressionResults instance will
then have another attribute `het_scale`, which is in this case is
resid^(2)/(1-h_ii).
HC3_se
MacKinnon and White's (1985) alternative heteroskedasticity robust
standard errors.
Defined as (X.T X)^(-1)X.T diag(e_i^(2)/(1-h_ii)^(2)) X(X.T X)^(-1)
where h_ii = x_i(X.T X)^(-1)x_i.T
HC3_see is a cached property.
When HC3_se or cov_HC3 is called the RegressionResults instance will
then have another attribute `het_scale`, which is in this case is
resid^(2)/(1-h_ii)^(2).
model
A pointer to the model instance that called fit() or results.
params
The linear coefficients that minimize the least squares
criterion. This is usually called Beta for the classical
linear model.
resid_pearson
`wresid` normalized to have unit variance.
"""
_cache = {} # needs to be a class attribute for scale setter?
def __init__(self, model, params, normalized_cov_params=None, scale=1.,
cov_type='nonrobust', cov_kwds=None, use_t=None, **kwargs):
super(RegressionResults, self).__init__(
model, params, normalized_cov_params, scale)
self._cache = {}
if hasattr(model, 'wexog_singular_values'):
self._wexog_singular_values = model.wexog_singular_values
else:
self._wexog_singular_values = None
self.df_model = model.df_model
self.df_resid = model.df_resid
if cov_type == 'nonrobust':
self.cov_type = 'nonrobust'
self.cov_kwds = {
'description': 'Standard Errors assume that the ' +
'covariance matrix of the errors is correctly ' +
'specified.'}
if use_t is None:
use_t = True # TODO: class default
self.use_t = use_t
else:
if cov_kwds is None:
cov_kwds = {}
if 'use_t' in cov_kwds:
# TODO: we want to get rid of 'use_t' in cov_kwds
use_t_2 = cov_kwds.pop('use_t')
if use_t is None:
use_t = use_t_2
# TODO: warn or not?
self.get_robustcov_results(cov_type=cov_type, use_self=True,
use_t=use_t, **cov_kwds)
for key in kwargs:
setattr(self, key, kwargs[key])
def __str__(self):
self.summary()
[docs] def conf_int(self, alpha=.05, cols=None):
"""
Returns the confidence interval of the fitted parameters.
Parameters
----------
alpha : float, optional
The `alpha` level for the confidence interval.
ie., The default `alpha` = .05 returns a 95% confidence interval.
cols : array-like, optional
`cols` specifies which confidence intervals to return
Notes
-----
The confidence interval is based on Student's t-distribution.
"""
# keep method for docstring for now
ci = super(RegressionResults, self).conf_int(alpha=alpha, cols=cols)
return ci
[docs] @cache_readonly
def nobs(self):
"""Number of observations n."""
return float(self.model.wexog.shape[0])
[docs] @cache_readonly
def fittedvalues(self):
"""The predicted values for the original (unwhitened) design."""
return self.model.predict(self.params, self.model.exog)
[docs] @cache_readonly
def wresid(self):
"""
The residuals of the transformed/whitened regressand and regressor(s)
"""
return self.model.wendog - self.model.predict(
self.params, self.model.wexog)
[docs] @cache_readonly
def resid(self):
"""The residuals of the model."""
return self.model.endog - self.model.predict(
self.params, self.model.exog)
# TODO: fix writable example
[docs] @cache_writable()
def scale(self):
"""
A scale factor for the covariance matrix. Default value is
ssr/(n-p). Note that the square root of `scale` is often
called the standard error of the regression.
"""
wresid = self.wresid
return np.dot(wresid, wresid) / self.df_resid
[docs] @cache_readonly
def ssr(self):
"""Sum of squared (whitened) residuals."""
wresid = self.wresid
return np.dot(wresid, wresid)
[docs] @cache_readonly
def centered_tss(self):
"""The total (weighted) sum of squares centered about the mean."""
model = self.model
weights = getattr(model, 'weights', None)
sigma = getattr(model, 'sigma', None)
if weights is not None:
mean = np.average(model.endog, weights=weights)
return np.sum(weights * (model.endog - mean)**2)
elif sigma is not None:
# Exactly matches WLS when sigma is diagonal
iota = np.ones_like(model.endog)
iota = model.whiten(iota)
mean = model.wendog.dot(iota) / iota.dot(iota)
err = model.endog - mean
err = model.whiten(err)
return np.sum(err**2)
else:
centered_endog = model.wendog - model.wendog.mean()
return np.dot(centered_endog, centered_endog)
[docs] @cache_readonly
def uncentered_tss(self):
"""
Uncentered sum of squares. Sum of the squared values of the
(whitened) endogenous response variable.
"""
wendog = self.model.wendog
return np.dot(wendog, wendog)
[docs] @cache_readonly
def ess(self):
"""Explained sum of squares. If a constant is present, the centered
total sum of squares minus the sum of squared residuals. If there is no
constant, the uncentered total sum of squares is used."""
if self.k_constant:
return self.centered_tss - self.ssr
else:
return self.uncentered_tss - self.ssr
[docs] @cache_readonly
def rsquared(self):
"""
R-squared of a model with an intercept. This is defined here
as 1 - `ssr`/`centered_tss` if the constant is included in the
model and 1 - `ssr`/`uncentered_tss` if the constant is
omitted.
"""
if self.k_constant:
return 1 - self.ssr/self.centered_tss
else:
return 1 - self.ssr/self.uncentered_tss
[docs] @cache_readonly
def rsquared_adj(self):
"""
Adjusted R-squared. This is defined here as 1 -
(`nobs`-1)/`df_resid` * (1-`rsquared`) if a constant is
included and 1 - `nobs`/`df_resid` * (1-`rsquared`) if no
constant is included.
"""
return 1 - (np.divide(self.nobs - self.k_constant, self.df_resid)
* (1 - self.rsquared))
[docs] @cache_readonly
def mse_model(self):
"""
Mean squared error the model. This is the explained sum of
squares divided by the model degrees of freedom.
"""
return self.ess/self.df_model
[docs] @cache_readonly
def mse_resid(self):
"""
Mean squared error of the residuals. The sum of squared
residuals divided by the residual degrees of freedom.
"""
return self.ssr/self.df_resid
[docs] @cache_readonly
def mse_total(self):
"""
Total mean squared error. Defined as the uncentered total sum
of squares divided by n the number of observations.
"""
if self.k_constant:
return self.centered_tss / (self.df_resid + self.df_model)
else:
return self.uncentered_tss / (self.df_resid + self.df_model)
[docs] @cache_readonly
def fvalue(self):
"""F-statistic of the fully specified model. Calculated as the mean
squared error of the model divided by the mean squared error of the
residuals."""
if hasattr(self, 'cov_type') and self.cov_type != 'nonrobust':
# with heteroscedasticity or correlation robustness
k_params = self.normalized_cov_params.shape[0]
mat = np.eye(k_params)
const_idx = self.model.data.const_idx
# TODO: What if model includes implicit constant, e.g. all
# dummies but no constant regressor?
# TODO: Restats as LM test by projecting orthogonalizing
# to constant?
if self.model.data.k_constant == 1:
# if constant is implicit, return nan see #2444
if const_idx is None:
return np.nan
idx = lrange(k_params)
idx.pop(const_idx)
mat = mat[idx] # remove constant
if mat.size == 0: # see #3642
return np.nan
ft = self.f_test(mat)
# using backdoor to set another attribute that we already have
self._cache['f_pvalue'] = ft.pvalue
return ft.fvalue
else:
# for standard homoscedastic case
return self.mse_model/self.mse_resid
[docs] @cache_readonly
def f_pvalue(self):
"""p-value of the F-statistic"""
return stats.f.sf(self.fvalue, self.df_model, self.df_resid)
[docs] @cache_readonly
def bse(self):
"""The standard errors of the parameter estimates."""
return np.sqrt(np.diag(self.cov_params()))
[docs] @cache_readonly
def aic(self):
r"""Akaike's information criteria. For a model with a constant
:math:`-2llf + 2(df\_model + 1)`. For a model without a constant
:math:`-2llf + 2(df\_model)`."""
return -2 * self.llf + 2 * (self.df_model + self.k_constant)
[docs] @cache_readonly
def bic(self):
r"""Bayes' information criteria. For a model with a constant
:math:`-2llf + \log(n)(df\_model+1)`. For a model without a constant
:math:`-2llf + \log(n)(df\_model)`"""
return (-2 * self.llf + np.log(self.nobs) * (self.df_model +
self.k_constant))
[docs] @cache_readonly
def eigenvals(self):
"""
Return eigenvalues sorted in decreasing order.
"""
if self._wexog_singular_values is not None:
eigvals = self._wexog_singular_values ** 2
else:
eigvals = np.linalg.linalg.eigvalsh(np.dot(self.model.wexog.T,
self.model.wexog))
return np.sort(eigvals)[::-1]
[docs] @cache_readonly
def condition_number(self):
"""
Return condition number of exogenous matrix.
Calculated as ratio of largest to smallest eigenvalue.
"""
eigvals = self.eigenvals
return np.sqrt(eigvals[0]/eigvals[-1])
# TODO: make these properties reset bse
def _HCCM(self, scale):
H = np.dot(self.model.pinv_wexog,
scale[:, None] * self.model.pinv_wexog.T)
return H
[docs] @cache_readonly
def cov_HC0(self):
"""
See statsmodels.RegressionResults
"""
self.het_scale = self.wresid**2
cov_HC0 = self._HCCM(self.het_scale)
return cov_HC0
[docs] @cache_readonly
def cov_HC1(self):
"""
See statsmodels.RegressionResults
"""
self.het_scale = self.nobs/(self.df_resid)*(self.wresid**2)
cov_HC1 = self._HCCM(self.het_scale)
return cov_HC1
[docs] @cache_readonly
def cov_HC2(self):
"""
See statsmodels.RegressionResults
"""
# probably could be optimized
h = np.diag(chain_dot(self.model.wexog,
self.normalized_cov_params,
self.model.wexog.T))
self.het_scale = self.wresid**2/(1-h)
cov_HC2 = self._HCCM(self.het_scale)
return cov_HC2
[docs] @cache_readonly
def cov_HC3(self):
"""
See statsmodels.RegressionResults
"""
h = np.diag(chain_dot(
self.model.wexog, self.normalized_cov_params, self.model.wexog.T))
self.het_scale = (self.wresid / (1 - h))**2
cov_HC3 = self._HCCM(self.het_scale)
return cov_HC3
[docs] @cache_readonly
def HC0_se(self):
"""
See statsmodels.RegressionResults
"""
return np.sqrt(np.diag(self.cov_HC0))
[docs] @cache_readonly
def HC1_se(self):
"""
See statsmodels.RegressionResults
"""
return np.sqrt(np.diag(self.cov_HC1))
[docs] @cache_readonly
def HC2_se(self):
"""
See statsmodels.RegressionResults
"""
return np.sqrt(np.diag(self.cov_HC2))
[docs] @cache_readonly
def HC3_se(self):
"""
See statsmodels.RegressionResults
"""
return np.sqrt(np.diag(self.cov_HC3))
[docs] @cache_readonly
def resid_pearson(self):
"""
Residuals, normalized to have unit variance.
Returns
-------
An array wresid standardized by the sqrt if scale
"""
if not hasattr(self, 'resid'):
raise ValueError('Method requires residuals.')
eps = np.finfo(self.wresid.dtype).eps
if np.sqrt(self.scale) < 10 * eps * self.model.endog.mean():
# don't divide if scale is zero close to numerical precision
from warnings import warn
warn("All residuals are 0, cannot compute normed residuals.",
RuntimeWarning)
return self.wresid
else:
return self.wresid / np.sqrt(self.scale)
def _is_nested(self, restricted):
"""
Parameters
----------
restricted : Result instance
The restricted model is assumed to be nested in the current
model. The result instance of the restricted model is required to
have two attributes, residual sum of squares, `ssr`, residual
degrees of freedom, `df_resid`.
Returns
-------
nested : bool
True if nested, otherwise false
Notes
-----
A most nests another model if the regressors in the smaller
model are spanned by the regressors in the larger model and
the regressand is identical.
"""
if self.model.nobs != restricted.model.nobs:
return False
full_rank = self.model.rank
restricted_rank = restricted.model.rank
if full_rank <= restricted_rank:
return False
restricted_exog = restricted.model.wexog
full_wresid = self.wresid
scores = restricted_exog * full_wresid[:, None]
score_l2 = np.sqrt(np.mean(scores.mean(0) ** 2))
# TODO: Could be improved, and may fail depending on scale of
# regressors
return np.allclose(score_l2, 0)
[docs] def compare_lm_test(self, restricted, demean=True, use_lr=False):
"""Use Lagrange Multiplier test to test whether restricted model is correct
Parameters
----------
restricted : Result instance
The restricted model is assumed to be nested in the
current model. The result instance of the restricted model
is required to have two attributes, residual sum of
squares, `ssr`, residual degrees of freedom, `df_resid`.
demean : bool
Flag indicating whether the demean the scores based on the
residuals from the restricted model. If True, the
covariance of the scores are used and the LM test is
identical to the large sample version of the LR test.
Returns
-------
lm_value : float
test statistic, chi2 distributed
p_value : float
p-value of the test statistic
df_diff : int
degrees of freedom of the restriction, i.e. difference in df
between models
Notes
-----
TODO: explain LM text
"""
import statsmodels.stats.sandwich_covariance as sw
from numpy.linalg import inv
if not self._is_nested(restricted):
raise ValueError("Restricted model is not nested by full model.")
wresid = restricted.wresid
wexog = self.model.wexog
scores = wexog * wresid[:, None]
n = self.nobs
df_full = self.df_resid
df_restr = restricted.df_resid
df_diff = (df_restr - df_full)
s = scores.mean(axis=0)
if use_lr:
scores = wexog * self.wresid[:, None]
demean = False
if demean:
scores = scores - scores.mean(0)[None, :]
# Form matters here. If homoskedastics can be sigma^2 (X'X)^-1
# If Heteroskedastic then the form below is fine
# If HAC then need to use HAC
# If Cluster, shoudl use cluster
cov_type = getattr(self, 'cov_type', 'nonrobust')
if cov_type == 'nonrobust':
sigma2 = np.mean(wresid**2)
XpX = np.dot(wexog.T, wexog) / n
Sinv = inv(sigma2 * XpX)
elif cov_type in ('HC0', 'HC1', 'HC2', 'HC3'):
Sinv = inv(np.dot(scores.T, scores) / n)
elif cov_type == 'HAC':
maxlags = self.cov_kwds['maxlags']
Sinv = inv(sw.S_hac_simple(scores, maxlags) / n)
elif cov_type == 'cluster':
# cluster robust standard errors
groups = self.cov_kwds['groups']
# TODO: Might need demean option in S_crosssection by group?
Sinv = inv(sw.S_crosssection(scores, groups))
else:
raise ValueError('Only nonrobust, HC, HAC and cluster are ' +
'currently connected')
lm_value = n * chain_dot(s, Sinv, s.T)
p_value = stats.chi2.sf(lm_value, df_diff)
return lm_value, p_value, df_diff
[docs] def compare_f_test(self, restricted):
"""use F test to test whether restricted model is correct
Parameters
----------
restricted : Result instance
The restricted model is assumed to be nested in the
current model. The result instance of the restricted model
is required to have two attributes, residual sum of
squares, `ssr`, residual degrees of freedom, `df_resid`.
Returns
-------
f_value : float
test statistic, F distributed
p_value : float
p-value of the test statistic
df_diff : int
degrees of freedom of the restriction, i.e. difference in
df between models
Notes
-----
See mailing list discussion October 17,
This test compares the residual sum of squares of the two
models. This is not a valid test, if there is unspecified
heteroscedasticity or correlation. This method will issue a
warning if this is detected but still return the results under
the assumption of homoscedasticity and no autocorrelation
(sphericity).
"""
has_robust1 = getattr(self, 'cov_type', 'nonrobust') != 'nonrobust'
has_robust2 = (getattr(restricted, 'cov_type', 'nonrobust') !=
'nonrobust')
if has_robust1 or has_robust2:
warnings.warn('F test for comparison is likely invalid with ' +
'robust covariance, proceeding anyway',
InvalidTestWarning)
ssr_full = self.ssr
ssr_restr = restricted.ssr
df_full = self.df_resid
df_restr = restricted.df_resid
df_diff = (df_restr - df_full)
f_value = (ssr_restr - ssr_full) / df_diff / ssr_full * df_full
p_value = stats.f.sf(f_value, df_diff, df_full)
return f_value, p_value, df_diff
[docs] def compare_lr_test(self, restricted, large_sample=False):
"""
Likelihood ratio test to test whether restricted model is correct
Parameters
----------
restricted : Result instance
The restricted model is assumed to be nested in the current model.
The result instance of the restricted model is required to have two
attributes, residual sum of squares, `ssr`, residual degrees of
freedom, `df_resid`.
large_sample : bool
Flag indicating whether to use a heteroskedasticity robust version
of the LR test, which is a modified LM test.
Returns
-------
lr_stat : float
likelihood ratio, chisquare distributed with df_diff degrees of
freedom
p_value : float
p-value of the test statistic
df_diff : int
degrees of freedom of the restriction, i.e. difference in df
between models
Notes
-----
The exact likelihood ratio is valid for homoskedastic data,
and is defined as
.. math:: D=-2\\log\\left(\\frac{\\mathcal{L}_{null}}
{\\mathcal{L}_{alternative}}\\right)
where :math:`\\mathcal{L}` is the likelihood of the
model. With :math:`D` distributed as chisquare with df equal
to difference in number of parameters or equivalently
difference in residual degrees of freedom.
The large sample version of the likelihood ratio is defined as
.. math:: D=n s^{\\prime}S^{-1}s
where :math:`s=n^{-1}\\sum_{i=1}^{n} s_{i}`
.. math:: s_{i} = x_{i,alternative} \\epsilon_{i,null}
is the average score of the model evaluated using the
residuals from null model and the regressors from the
alternative model and :math:`S` is the covariance of the
scores, :math:`s_{i}`. The covariance of the scores is
estimated using the same estimator as in the alternative
model.
This test compares the loglikelihood of the two models. This
may not be a valid test, if there is unspecified
heteroscedasticity or correlation. This method will issue a
warning if this is detected but still return the results
without taking unspecified heteroscedasticity or correlation
into account.
This test compares the loglikelihood of the two models. This
may not be a valid test, if there is unspecified
heteroscedasticity or correlation. This method will issue a
warning if this is detected but still return the results
without taking unspecified heteroscedasticity or correlation
into account.
is the average score of the model evaluated using the
residuals from null model and the regressors from the
alternative model and :math:`S` is the covariance of the
scores, :math:`s_{i}`. The covariance of the scores is
estimated using the same estimator as in the alternative
model.
TODO: put into separate function, needs tests
"""
# See mailing list discussion October 17,
if large_sample:
return self.compare_lm_test(restricted, use_lr=True)
has_robust1 = (getattr(self, 'cov_type', 'nonrobust') != 'nonrobust')
has_robust2 = (
getattr(restricted, 'cov_type', 'nonrobust') != 'nonrobust')
if has_robust1 or has_robust2:
warnings.warn('Likelihood Ratio test is likely invalid with ' +
'robust covariance, proceeding anyway',
InvalidTestWarning)
llf_full = self.llf
llf_restr = restricted.llf
df_full = self.df_resid
df_restr = restricted.df_resid
lrdf = (df_restr - df_full)
lrstat = -2*(llf_restr - llf_full)
lr_pvalue = stats.chi2.sf(lrstat, lrdf)
return lrstat, lr_pvalue, lrdf
[docs] def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwds):
"""create new results instance with robust covariance as default
Parameters
----------
cov_type : string
the type of robust sandwich estimator to use. see Notes below
use_t : bool
If true, then the t distribution is used for inference.
If false, then the normal distribution is used.
If `use_t` is None, then an appropriate default is used, which is
`true` if the cov_type is nonrobust, and `false` in all other
cases.
kwds : depends on cov_type
Required or optional arguments for robust covariance calculation.
see Notes below
Returns
-------
results : results instance
This method creates a new results instance with the
requested robust covariance as the default covariance of
the parameters. Inferential statistics like p-values and
hypothesis tests will be based on this covariance matrix.
Notes
-----
The following covariance types and required or optional arguments are
currently available:
- 'fixed scale' and optional keyword argument 'scale' which uses
a predefined scale estimate with default equal to one.
- 'HC0', 'HC1', 'HC2', 'HC3' and no keyword arguments:
heteroscedasticity robust covariance
- 'HAC' and keywords
- `maxlag` integer (required) : number of lags to use
- `kernel` callable or str (optional) : kernel
currently available kernels are ['bartlett', 'uniform'],
default is Bartlett
- `use_correction` bool (optional) : If true, use small sample
correction
- 'cluster' and required keyword `groups`, integer group indicator
- `groups` array_like, integer (required) :
index of clusters or groups
- `use_correction` bool (optional) :
If True the sandwich covariance is calculated with a small
sample correction.
If False the sandwich covariance is calculated without
small sample correction.
- `df_correction` bool (optional)
If True (default), then the degrees of freedom for the
inferential statistics and hypothesis tests, such as
pvalues, f_pvalue, conf_int, and t_test and f_test, are
based on the number of groups minus one instead of the
total number of observations minus the number of explanatory
variables. `df_resid` of the results instance is adjusted.
If False, then `df_resid` of the results instance is not
adjusted.
- 'hac-groupsum' Driscoll and Kraay, heteroscedasticity and
autocorrelation robust standard errors in panel data
keywords
- `time` array_like (required) : index of time periods
- `maxlag` integer (required) : number of lags to use
- `kernel` callable or str (optional) : kernel
currently available kernels are ['bartlett', 'uniform'],
default is Bartlett
- `use_correction` False or string in ['hac', 'cluster'] (optional) :
If False the the sandwich covariance is calulated without
small sample correction.
If `use_correction = 'cluster'` (default), then the same
small sample correction as in the case of 'covtype='cluster''
is used.
- `df_correction` bool (optional)
adjustment to df_resid, see cov_type 'cluster' above
# TODO: we need more options here
- 'hac-panel' heteroscedasticity and autocorrelation robust standard
errors in panel data.
The data needs to be sorted in this case, the time series
for each panel unit or cluster need to be stacked. The
membership to a timeseries of an individual or group can
be either specified by group indicators or by increasing
time periods.
keywords
- either `groups` or `time` : array_like (required)
`groups` : indicator for groups
`time` : index of time periods
- `maxlag` integer (required) : number of lags to use
- `kernel` callable or str (optional) : kernel
currently available kernels are ['bartlett', 'uniform'],
default is Bartlett
- `use_correction` False or string in ['hac', 'cluster'] (optional) :
If False the sandwich covariance is calculated without
small sample correction.
- `df_correction` bool (optional)
adjustment to df_resid, see cov_type 'cluster' above
# TODO: we need more options here
Reminder:
`use_correction` in "hac-groupsum" and "hac-panel" is not bool,
needs to be in [False, 'hac', 'cluster']
TODO: Currently there is no check for extra or misspelled keywords,
except in the case of cov_type `HCx`
"""
import statsmodels.stats.sandwich_covariance as sw
from statsmodels.base.covtype import normalize_cov_type, descriptions
cov_type = normalize_cov_type(cov_type)
if 'kernel' in kwds:
kwds['weights_func'] = kwds.pop('kernel')
if 'weights_func' in kwds and not callable(kwds['weights_func']):
kwds['weights_func'] = sw.kernel_dict[kwds['weights_func']]
# TODO: make separate function that returns a robust cov plus info
use_self = kwds.pop('use_self', False)
if use_self:
res = self
else:
res = self.__class__(
self.model, self.params,
normalized_cov_params=self.normalized_cov_params,
scale=self.scale)
res.cov_type = cov_type
# use_t might already be defined by the class, and already set
if use_t is None:
use_t = self.use_t
res.cov_kwds = {'use_t': use_t} # store for information
res.use_t = use_t
adjust_df = False
if cov_type in ['cluster', 'hac-panel', 'hac-groupsum']:
df_correction = kwds.get('df_correction', None)
# TODO: check also use_correction, do I need all combinations?
if df_correction is not False: # i.e. in [None, True]:
# user didn't explicitely set it to False
adjust_df = True
res.cov_kwds['adjust_df'] = adjust_df
# verify and set kwds, and calculate cov
# TODO: this should be outsourced in a function so we can reuse it in
# other models
# TODO: make it DRYer repeated code for checking kwds
if cov_type in ['fixed scale', 'fixed_scale']:
res.cov_kwds['description'] = descriptions['fixed_scale']
res.cov_kwds['scale'] = scale = kwds.get('scale', 1.)
res.cov_params_default = scale * res.normalized_cov_params
elif cov_type.upper() in ('HC0', 'HC1', 'HC2', 'HC3'):
if kwds:
raise ValueError('heteroscedasticity robust covariance '
'does not use keywords')
res.cov_kwds['description'] = descriptions[cov_type.upper()]
# TODO cannot access cov without calling se first
getattr(self, cov_type.upper() + '_se')
res.cov_params_default = getattr(self, 'cov_' + cov_type.upper())
elif cov_type.lower() == 'hac':
maxlags = kwds['maxlags'] # required?, default in cov_hac_simple
res.cov_kwds['maxlags'] = maxlags
weights_func = kwds.get('weights_func', sw.weights_bartlett)
res.cov_kwds['weights_func'] = weights_func
use_correction = kwds.get('use_correction', False)
res.cov_kwds['use_correction'] = use_correction
res.cov_kwds['description'] = descriptions['HAC'].format(
maxlags=maxlags,
correction=['without', 'with'][use_correction])
res.cov_params_default = sw.cov_hac_simple(
self, nlags=maxlags, weights_func=weights_func,
use_correction=use_correction)
elif cov_type.lower() == 'cluster':
# cluster robust standard errors, one- or two-way
groups = kwds['groups']
if not hasattr(groups, 'shape'):
groups = np.asarray(groups).T
if groups.ndim >= 2:
groups = groups.squeeze()
res.cov_kwds['groups'] = groups
use_correction = kwds.get('use_correction', True)
res.cov_kwds['use_correction'] = use_correction
if groups.ndim == 1:
if adjust_df:
# need to find number of groups
# duplicate work
self.n_groups = n_groups = len(np.unique(groups))
res.cov_params_default = sw.cov_cluster(
self, groups, use_correction=use_correction)
elif groups.ndim == 2:
if hasattr(groups, 'values'):
groups = groups.values
if adjust_df:
# need to find number of groups
# duplicate work
n_groups0 = len(np.unique(groups[:, 0]))
n_groups1 = len(np.unique(groups[:, 1]))
self.n_groups = (n_groups0, n_groups1)
n_groups = min(n_groups0, n_groups1) # use for adjust_df
# Note: sw.cov_cluster_2groups has 3 returns
res.cov_params_default = sw.cov_cluster_2groups(
self, groups, use_correction=use_correction)[0]
else:
raise ValueError('only two groups are supported')
res.cov_kwds['description'] = descriptions['cluster']
elif cov_type.lower() == 'hac-panel':
# cluster robust standard errors
res.cov_kwds['time'] = time = kwds.get('time', None)
res.cov_kwds['groups'] = groups = kwds.get('groups', None)
# TODO: nlags is currently required
# nlags = kwds.get('nlags', True)
# res.cov_kwds['nlags'] = nlags
# TODO: `nlags` or `maxlags`
res.cov_kwds['maxlags'] = maxlags = kwds['maxlags']
use_correction = kwds.get('use_correction', 'hac')
res.cov_kwds['use_correction'] = use_correction
weights_func = kwds.get('weights_func', sw.weights_bartlett)
res.cov_kwds['weights_func'] = weights_func
if groups is not None:
groups = np.asarray(groups)
tt = (np.nonzero(groups[:-1] != groups[1:])[0] + 1).tolist()
nobs_ = len(groups)
elif time is not None:
time = np.asarray(time)
# TODO: clumsy time index in cov_nw_panel
tt = (np.nonzero(time[1:] < time[:-1])[0] + 1).tolist()
nobs_ = len(time)
else:
raise ValueError('either time or groups needs to be given')
groupidx = lzip([0] + tt, tt + [nobs_])
self.n_groups = n_groups = len(groupidx)
res.cov_params_default = sw.cov_nw_panel(self, maxlags, groupidx,
weights_func=weights_func,
use_correction=use_correction)
res.cov_kwds['description'] = descriptions['HAC-Panel']
elif cov_type.lower() == 'hac-groupsum':
# Driscoll-Kraay standard errors
res.cov_kwds['time'] = time = kwds['time']
# TODO: nlags is currently required
# nlags = kwds.get('nlags', True)
# res.cov_kwds['nlags'] = nlags
# TODO: `nlags` or `maxlags`
res.cov_kwds['maxlags'] = maxlags = kwds['maxlags']
use_correction = kwds.get('use_correction', 'cluster')
res.cov_kwds['use_correction'] = use_correction
weights_func = kwds.get('weights_func', sw.weights_bartlett)
res.cov_kwds['weights_func'] = weights_func
if adjust_df:
# need to find number of groups
tt = (np.nonzero(time[1:] < time[:-1])[0] + 1)
self.n_groups = n_groups = len(tt) + 1
res.cov_params_default = sw.cov_nw_groupsum(
self, maxlags, time, weights_func=weights_func,
use_correction=use_correction)
res.cov_kwds['description'] = descriptions['HAC-Groupsum']
else:
raise ValueError('cov_type not recognized. See docstring for ' +
'available options and spelling')
if adjust_df:
# Note: df_resid is used for scale and others, add new attribute
res.df_resid_inference = n_groups - 1
return res
[docs] def get_prediction(self, exog=None, transform=True, weights=None,
row_labels=None, **kwds):
return pred.get_prediction(
self, exog=exog, transform=transform, weights=weights,
row_labels=row_labels, **kwds)
get_prediction.__doc__ = pred.get_prediction.__doc__
[docs] def summary(self, yname=None, xname=None, title=None, alpha=.05):
"""Summarize the Regression Results
Parameters
----------
yname : string, optional
Default is `y`
xname : list of strings, optional
Default is `var_##` for ## in p the number of regressors
title : string, optional
Title for the top table. If not None, then this replaces the
default title
alpha : float
significance level for the confidence intervals
Returns
-------
smry : Summary instance
this holds the summary tables and text, which can be printed or
converted to various output formats.
See Also
--------
statsmodels.iolib.summary.Summary : class to hold summary results
"""
from statsmodels.stats.stattools import (
jarque_bera, omni_normtest, durbin_watson)
jb, jbpv, skew, kurtosis = jarque_bera(self.wresid)
omni, omnipv = omni_normtest(self.wresid)
eigvals = self.eigenvals
condno = self.condition_number
# TODO: Avoid adding attributes in non-__init__
self.diagn = dict(jb=jb, jbpv=jbpv, skew=skew, kurtosis=kurtosis,
omni=omni, omnipv=omnipv, condno=condno,
mineigval=eigvals[-1])
# TODO not used yet
# diagn_left_header = ['Models stats']
# diagn_right_header = ['Residual stats']
# TODO: requiring list/iterable is a bit annoying
# need more control over formatting
# TODO: default don't work if it's not identically spelled
top_left = [('Dep. Variable:', None),
('Model:', None),
('Method:', ['Least Squares']),
('Date:', None),
('Time:', None),
('No. Observations:', None),
('Df Residuals:', None),
('Df Model:', None),
]
if hasattr(self, 'cov_type'):
top_left.append(('Covariance Type:', [self.cov_type]))
rsquared_type = '' if self.k_constant else ' (uncentered)'
top_right = [('R-squared' + rsquared_type + ':', ["%#8.3f" % self.rsquared]),
('Adj. R-squared' + rsquared_type + ':', ["%#8.3f" % self.rsquared_adj]),
('F-statistic:', ["%#8.4g" % self.fvalue]),
('Prob (F-statistic):', ["%#6.3g" % self.f_pvalue]),
('Log-Likelihood:', None),
('AIC:', ["%#8.4g" % self.aic]),
('BIC:', ["%#8.4g" % self.bic])
]
diagn_left = [('Omnibus:', ["%#6.3f" % omni]),
('Prob(Omnibus):', ["%#6.3f" % omnipv]),
('Skew:', ["%#6.3f" % skew]),
('Kurtosis:', ["%#6.3f" % kurtosis])
]
diagn_right = [('Durbin-Watson:',
["%#8.3f" % durbin_watson(self.wresid)]
),
('Jarque-Bera (JB):', ["%#8.3f" % jb]),
('Prob(JB):', ["%#8.3g" % jbpv]),
('Cond. No.', ["%#8.3g" % condno])
]
if title is None:
title = self.model.__class__.__name__ + ' ' + "Regression Results"
# create summary table instance
from statsmodels.iolib.summary import Summary
smry = Summary()
smry.add_table_2cols(self, gleft=top_left, gright=top_right,
yname=yname, xname=xname, title=title)
smry.add_table_params(self, yname=yname, xname=xname, alpha=alpha,
use_t=self.use_t)
smry.add_table_2cols(self, gleft=diagn_left, gright=diagn_right,
yname=yname, xname=xname,
title="")
# add warnings/notes, added to text format only
etext = []
if hasattr(self, 'cov_type'):
etext.append(self.cov_kwds['description'])
if self.model.exog.shape[0] < self.model.exog.shape[1]:
wstr = "The input rank is higher than the number of observations."
etext.append(wstr)
if eigvals[-1] < 1e-10:
wstr = "The smallest eigenvalue is %6.3g. This might indicate "
wstr += "that there are\n"
wstr += "strong multicollinearity problems or that the design "
wstr += "matrix is singular."
wstr = wstr % eigvals[-1]
etext.append(wstr)
elif condno > 1000: # TODO: what is recommended?
wstr = "The condition number is large, %6.3g. This might "
wstr += "indicate that there are\n"
wstr += "strong multicollinearity or other numerical "
wstr += "problems."
wstr = wstr % condno
etext.append(wstr)
if etext:
etext = ["[{0}] {1}".format(i + 1, text)
for i, text in enumerate(etext)]
etext.insert(0, "Warnings:")
smry.add_extra_txt(etext)
return smry
[docs] def summary2(self, yname=None, xname=None, title=None, alpha=.05,
float_format="%.4f"):
"""Experimental summary function to summarize the regression results
Parameters
----------
xname : List of strings of length equal to the number of parameters
Names of the independent variables (optional)
yname : string
Name of the dependent variable (optional)
title : string, optional
Title for the top table. If not None, then this replaces the
default title
alpha : float
significance level for the confidence intervals
float_format: string
print format for floats in parameters summary
Returns
-------
smry : Summary instance
this holds the summary tables and text, which can be printed or
converted to various output formats.
See Also
--------
statsmodels.iolib.summary2.Summary : class to hold summary results
"""
# Diagnostics
from statsmodels.stats.stattools import (jarque_bera,
omni_normtest,
durbin_watson)
from collections import OrderedDict
jb, jbpv, skew, kurtosis = jarque_bera(self.wresid)
omni, omnipv = omni_normtest(self.wresid)
dw = durbin_watson(self.wresid)
eigvals = self.eigenvals
condno = self.condition_number
eigvals = np.sort(eigvals) # in increasing order
diagnostic = OrderedDict([
('Omnibus:', "%.3f" % omni),
('Prob(Omnibus):', "%.3f" % omnipv),
('Skew:', "%.3f" % skew),
('Kurtosis:', "%.3f" % kurtosis),
('Durbin-Watson:', "%.3f" % dw),
('Jarque-Bera (JB):', "%.3f" % jb),
('Prob(JB):', "%.3f" % jbpv),
('Condition No.:', "%.0f" % condno)
])
# Summary
from statsmodels.iolib import summary2
smry = summary2.Summary()
smry.add_base(results=self, alpha=alpha, float_format=float_format,
xname=xname, yname=yname, title=title)
smry.add_dict(diagnostic)
# Warnings
if eigvals[-1] < 1e-10:
warn = "The smallest eigenvalue is %6.3g. This might indicate that\
there are strong multicollinearity problems or that the design\
matrix is singular." % eigvals[-1]
smry.add_text(warn)
if condno > 1000:
warn = "* The condition number is large (%.g). This might indicate \
strong multicollinearity or other numerical problems." % condno
smry.add_text(warn)
return smry
[docs]class OLSResults(RegressionResults):
"""
Results class for for an OLS model.
Most of the methods and attributes are inherited from RegressionResults.
The special methods that are only available for OLS are:
- get_influence
- outlier_test
- el_test
- conf_int_el
See Also
--------
RegressionResults
"""
[docs] def get_influence(self):
"""
get an instance of Influence with influence and outlier measures
Returns
-------
infl : Influence instance
the instance has methods to calculate the main influence and
outlier measures for the OLS regression
See Also
--------
statsmodels.stats.outliers_influence.OLSInfluence
"""
from statsmodels.stats.outliers_influence import OLSInfluence
return OLSInfluence(self)
[docs] def outlier_test(self, method='bonf', alpha=.05, labels=None,
order=False, cutoff=None):
"""
Test observations for outliers according to method
Parameters
----------
method : str
- `bonferroni` : one-step correction
- `sidak` : one-step correction
- `holm-sidak` :
- `holm` :
- `simes-hochberg` :
- `hommel` :
- `fdr_bh` : Benjamini/Hochberg
- `fdr_by` : Benjamini/Yekutieli
See `statsmodels.stats.multitest.multipletests` for details.
alpha : float
familywise error rate
labels : None or array_like
If `labels` is not None, then it will be used as index to the
returned pandas DataFrame. See also Returns below
order : bool
Whether or not to order the results by the absolute value of the
studentized residuals. If labels are provided they will also be sorted.
cutoff : None or float in [0, 1]
If cutoff is not None, then the return only includes observations with
multiple testing corrected p-values strictly below the cutoff. The
returned array or dataframe can be empty if t
Returns
-------
table : ndarray or DataFrame
Returns either an ndarray or a DataFrame if labels is not None.
Will attempt to get labels from model_results if available. The
columns are the Studentized residuals, the unadjusted p-value,
and the corrected p-value according to method.
Notes
-----
The unadjusted p-value is stats.t.sf(abs(resid), df) where
df = df_resid - 1.
"""
from statsmodels.stats.outliers_influence import outlier_test
return outlier_test(self, method, alpha, labels=labels,
order=order, cutoff=cutoff)
[docs] def el_test(self, b0_vals, param_nums, return_weights=0,
ret_params=0, method='nm',
stochastic_exog=1, return_params=0):
"""
Tests single or joint hypotheses of the regression parameters using
Empirical Likelihood.
Parameters
----------
b0_vals : 1darray
The hypothesized value of the parameter to be tested
param_nums : 1darray
The parameter number to be tested
print_weights : bool
If true, returns the weights that optimize the likelihood
ratio at b0_vals. Default is False
ret_params : bool
If true, returns the parameter vector that maximizes the likelihood
ratio at b0_vals. Also returns the weights. Default is False
method : string
Can either be 'nm' for Nelder-Mead or 'powell' for Powell. The
optimization method that optimizes over nuisance parameters.
Default is 'nm'
stochastic_exog : bool
When TRUE, the exogenous variables are assumed to be stochastic.
When the regressors are nonstochastic, moment conditions are
placed on the exogenous variables. Confidence intervals for
stochastic regressors are at least as large as non-stochastic
regressors. Default = TRUE
Returns
-------
res : tuple
The p-value and -2 times the log-likelihood ratio for the
hypothesized values.
Examples
--------
>>> import statsmodels.api as sm
>>> data = sm.datasets.stackloss.load(as_pandas=False)
>>> endog = data.endog
>>> exog = sm.add_constant(data.exog)
>>> model = sm.OLS(endog, exog)
>>> fitted = model.fit()
>>> fitted.params
>>> array([-39.91967442, 0.7156402 , 1.29528612, -0.15212252])
>>> fitted.rsquared
>>> 0.91357690446068196
>>> # Test that the slope on the first variable is 0
>>> fitted.el_test([0], [1])
>>> (27.248146353888796, 1.7894660442330235e-07)
"""
params = np.copy(self.params)
opt_fun_inst = _ELRegOpts() # to store weights
if len(param_nums) == len(params):
llr = opt_fun_inst._opt_nuis_regress(
[],
param_nums=param_nums,
endog=self.model.endog,
exog=self.model.exog,
nobs=self.model.nobs,
nvar=self.model.exog.shape[1],
params=params,
b0_vals=b0_vals,
stochastic_exog=stochastic_exog)
pval = 1 - stats.chi2.cdf(llr, len(param_nums))
if return_weights:
return llr, pval, opt_fun_inst.new_weights
else:
return llr, pval
x0 = np.delete(params, param_nums)
args = (param_nums, self.model.endog, self.model.exog,
self.model.nobs, self.model.exog.shape[1], params,
b0_vals, stochastic_exog)
if method == 'nm':
llr = optimize.fmin(opt_fun_inst._opt_nuis_regress, x0,
maxfun=10000, maxiter=10000, full_output=1,
disp=0, args=args)[1]
if method == 'powell':
llr = optimize.fmin_powell(opt_fun_inst._opt_nuis_regress, x0,
full_output=1, disp=0,
args=args)[1]
pval = 1 - stats.chi2.cdf(llr, len(param_nums))
if ret_params:
return llr, pval, opt_fun_inst.new_weights, opt_fun_inst.new_params
elif return_weights:
return llr, pval, opt_fun_inst.new_weights
else:
return llr, pval
[docs] def conf_int_el(self, param_num, sig=.05, upper_bound=None,
lower_bound=None, method='nm', stochastic_exog=1):
"""
Computes the confidence interval for the parameter given by param_num
using Empirical Likelihood
Parameters
----------
param_num : float
The parameter for which the confidence interval is desired
sig : float
The significance level. Default is .05
upper_bound : float
The maximum value the upper limit can be. Default is the
99.9% confidence value under OLS assumptions.
lower_bound : float
The minimum value the lower limit can be. Default is the 99.9%
confidence value under OLS assumptions.
method : string
Can either be 'nm' for Nelder-Mead or 'powell' for Powell. The
optimization method that optimizes over nuisance parameters.
Default is 'nm'
Returns
-------
ci : tuple
The confidence interval
See Also
--------
el_test
Notes
-----
This function uses brentq to find the value of beta where
test_beta([beta], param_num)[1] is equal to the critical
value.
The function returns the results of each iteration of brentq at
each value of beta.
The current function value of the last printed optimization
should be the critical value at the desired significance level.
For alpha=.05, the value is 3.841459.
To ensure optimization terminated successfully, it is suggested to
do el_test([lower_limit], [param_num])
If the optimization does not terminate successfully, consider switching
optimization algorithms.
If optimization is still not successful, try changing the values of
start_int_params. If the current function value repeatedly jumps
from a number between 0 and the critical value and a very large number
(>50), the starting parameters of the interior minimization need
to be changed.
"""
r0 = stats.chi2.ppf(1 - sig, 1)
if upper_bound is None:
upper_bound = self.conf_int(.01)[param_num][1]
if lower_bound is None:
lower_bound = self.conf_int(.01)[param_num][0]
f = lambda b0: self.el_test(np.array([b0]), np.array([param_num]),
method=method,
stochastic_exog=stochastic_exog)[0]-r0
lowerl = optimize.brenth(f, lower_bound,
self.params[param_num])
upperl = optimize.brenth(f, self.params[param_num],
upper_bound)
# ^ Seems to be faster than brentq in most cases
return (lowerl, upperl)
class RegressionResultsWrapper(wrap.ResultsWrapper):
_attrs = {
'chisq': 'columns',
'sresid': 'rows',
'weights': 'rows',
'wresid': 'rows',
'bcov_unscaled': 'cov',
'bcov_scaled': 'cov',
'HC0_se': 'columns',
'HC1_se': 'columns',
'HC2_se': 'columns',
'HC3_se': 'columns',
'norm_resid': 'rows',
}
_wrap_attrs = wrap.union_dicts(base.LikelihoodResultsWrapper._attrs,
_attrs)
_methods = {}
_wrap_methods = wrap.union_dicts(
base.LikelihoodResultsWrapper._wrap_methods,
_methods)
wrap.populate_wrapper(RegressionResultsWrapper,
RegressionResults)