Source code for statsmodels.distributions.copula.copulas

"""

Which Archimedean is Best?
Extreme Value copulas formulas are based on Genest 2009

References
----------

Genest, C., 2009. Rank-based inference for bivariate extreme-value
copulas. The Annals of Statistics, 37(5), pp.2990-3022.

"""
from abc import ABC, abstractmethod

import numpy as np
from scipy import stats

from statsmodels.graphics import utils


[docs]class CopulaDistribution: """Multivariate copula distribution Parameters ---------- copula : str, instance of copula class String name or instance of a copula class marginals : list of distribution instances Marginal distributions. copargs : tuple Parameters for copula Notes ----- Status: experimental, argument handling may still change """ def __init__(self, copula, marginals, cop_args=()): self.copula = copula # no checking done on marginals self.marginals = marginals self.cop_args = cop_args self.k_vars = len(marginals)
[docs] def rvs(self, nobs=1, cop_args=None, marg_args=None, random_state=None): """Draw `n` in the half-open interval ``[0, 1)``. Sample the joint distribution. Parameters ---------- nobs : int, optional Number of samples to generate in the parameter space. Default is 1. cop_args : tuple Copula parameters. If None, then the copula parameters will be taken from the ``cop_args`` attribute created when initiializing the instance. marg_args : list of tuples Parameters for the marginal distributions. It can be None if none of the marginal distributions have parameters, otherwise it needs to be a list of tuples with the same length has the number of marginal distributions. The list can contain empty tuples for marginal distributions that do not take parameter arguments. random_state : {None, int, `numpy.random.Generator`}, optional If `seed` is None then the legacy singleton NumPy generator. This will change after 0.13 to use a fresh NumPy ``Generator``, so you should explicitly pass a seeded ``Generator`` if you need reproducible results. If `seed` is an int, a new ``Generator`` instance is used, seeded with `seed`. If `seed` is already a ``Generator`` instance then that instance is used. Returns ------- sample : array_like (n, d) Sample from the joint distribution. Notes ----- The random samples are generated by creating a sample with uniform margins from the copula, and using ``ppf`` to convert uniform margins to the one specified by the marginal distribution. See Also -------- statsmodels.tools.rng_qrng.check_random_state """ if cop_args is None: cop_args = self.cop_args if marg_args is None: marg_args = [()] * self.k_vars sample = self.copula.rvs(nobs=nobs, args=cop_args, random_state=random_state) for i, dist in enumerate(self.marginals): sample[:, i] = dist.ppf(0.5 + (1 - 1e-10) * (sample[:, i] - 0.5), *marg_args[i]) return sample
[docs] def cdf(self, y, cop_args=None, marg_args=None): """CDF of copula distribution. Parameters ---------- y : array_like Values of random variable at which to evaluate cdf. If 2-dimensional, then components of multivariate random variable need to be in columns cop_args : tuple Copula parameters. If None, then the copula parameters will be taken from the ``cop_args`` attribute created when initiializing the instance. marg_args : list of tuples Parameters for the marginal distributions. It can be None if none of the marginal distributions have parameters, otherwise it needs to be a list of tuples with the same length has the number of marginal distributions. The list can contain empty tuples for marginal distributions that do not take parameter arguments. Returns ------- cdf values """ y = np.asarray(y) if cop_args is None: cop_args = self.cop_args if marg_args is None: marg_args = [()] * y.shape[-1] cdf_marg = [] for i in range(self.k_vars): cdf_marg.append(self.marginals[i].cdf(y[..., i], *marg_args[i])) u = np.column_stack(cdf_marg) if y.ndim == 1: u = u.squeeze() return self.copula.cdf(u, cop_args)
[docs] def pdf(self, y, cop_args=None, marg_args=None): """PDF of copula distribution. Parameters ---------- y : array_like Values of random variable at which to evaluate cdf. If 2-dimensional, then components of multivariate random variable need to be in columns cop_args : tuple Copula parameters. If None, then the copula parameters will be taken from the ``cop_args`` attribute created when initiializing the instance. marg_args : list of tuples Parameters for the marginal distributions. It can be None if none of the marginal distributions have parameters, otherwise it needs to be a list of tuples with the same length has the number of marginal distributions. The list can contain empty tuples for marginal distributions that do not take parameter arguments. Returns ------- pdf values """ return np.exp(self.logpdf(y, cop_args=cop_args, marg_args=marg_args))
[docs] def logpdf(self, y, cop_args=None, marg_args=None): """Log-pdf of copula distribution. Parameters ---------- y : array_like Values of random variable at which to evaluate cdf. If 2-dimensional, then components of multivariate random variable need to be in columns cop_args : tuple Copula parameters. If None, then the copula parameters will be taken from the ``cop_args`` attribute creating when initiializing the instance. marg_args : list of tuples Parameters for the marginal distributions. It can be None if none of the marginal distributions have parameters, otherwise it needs to be a list of tuples with the same length has the number of marginal distributions. The list can contain empty tuples for marginal distributions that do not take parameter arguments. Returns ------- log-pdf values """ y = np.asarray(y) if cop_args is None: cop_args = self.cop_args if marg_args is None: marg_args = tuple([()] * y.shape[-1]) lpdf = 0.0 cdf_marg = [] for i in range(self.k_vars): lpdf += self.marginals[i].logpdf(y[..., i], *marg_args[i]) cdf_marg.append(self.marginals[i].cdf(y[..., i], *marg_args[i])) u = np.column_stack(cdf_marg) if y.ndim == 1: u = u.squeeze() lpdf += self.copula.logpdf(u, cop_args) return lpdf
class Copula(ABC): r"""A generic Copula class meant for subclassing. Notes ----- A function :math:`\phi` on :math:`[0, \infty]` is the Laplace-Stieltjes transform of a distribution function if and only if :math:`\phi` is completely monotone and :math:`\phi(0) = 1` [2]_. The following algorithm for sampling a ``d``-dimensional exchangeable Archimedean copula with generator :math:`\phi` is due to Marshall, Olkin (1988) [1]_, where :math:`LS^{−1}(\phi)` denotes the inverse Laplace-Stieltjes transform of :math:`\phi`. From a mixture representation with respect to :math:`F`, the following algorithm may be derived for sampling Archimedean copulas, see [1]_. 1. Sample :math:`V \sim F = LS^{−1}(\phi)`. 2. Sample i.i.d. :math:`X_i \sim U[0,1], i \in \{1,...,d\}`. 3. Return:math:`(U_1,..., U_d)`, where :math:`U_i = \phi(−\log(X_i)/V), i \in \{1, ...,d\}`. Detailed properties of each copula can be found in [3]_. Instances of the class can access the attributes: ``rng`` for the random number generator (used for the ``seed``). **Subclassing** When subclassing `Copula` to create a new copula, ``__init__`` and ``random`` must be redefined. * ``__init__(theta)``: If the copula does not take advantage of a ``theta``, this parameter can be omitted. * ``random(n, random_state)``: draw ``n`` from the copula. * ``pdf(x)``: PDF from the copula. * ``cdf(x)``: CDF from the copula. References ---------- .. [1] Marshall AW, Olkin I. “Families of Multivariate Distributions”, Journal of the American Statistical Association, 83, 834–841, 1988. .. [2] Marius Hofert. "Sampling Archimedean copulas", Universität Ulm, 2008. .. rvs[3] Harry Joe. "Dependence Modeling with Copulas", Monographs on Statistics and Applied Probability 134, 2015. """ def __init__(self, k_dim=2): self.k_dim = k_dim if k_dim > 2: import warnings warnings.warn("copulas for more than 2 dimension is untested") def rvs(self, nobs=1, args=(), random_state=None): """Draw `n` in the half-open interval ``[0, 1)``. Marginals are uniformly distributed. Parameters ---------- nobs : int, optional Number of samples to generate from the copula. Default is 1. args : tuple Arguments for copula parameters. The number of arguments depends on the copula. random_state : {None, int, `numpy.random.Generator`}, optional If `seed` is None then the legacy singleton NumPy generator. This will change after 0.13 to use a fresh NumPy ``Generator``, so you should explicitly pass a seeded ``Generator`` if you need reproducible results. If `seed` is an int, a new ``Generator`` instance is used, seeded with `seed`. If `seed` is already a ``Generator`` instance then that instance is used. Returns ------- sample : array_like (nobs, d) Sample from the copula. See Also -------- statsmodels.tools.rng_qrng.check_random_state """ raise NotImplementedError @abstractmethod def pdf(self, u, args=()): """Probability density function of copula. Parameters ---------- u : array_like, 2-D Points of random variables in unit hypercube at which method is evaluated. The second (or last) dimension should be the same as the dimension of the random variable, e.g. 2 for bivariate copula. args : tuple Arguments for copula parameters. The number of arguments depends on the copula. Returns ------- pdf : ndarray, (nobs, k_dim) Copula pdf evaluated at points ``u``. """ def logpdf(self, u, args=()): """Log of copula pdf, loglikelihood. Parameters ---------- u : array_like, 2-D Points of random variables in unit hypercube at which method is evaluated. The second (or last) dimension should be the same as the dimension of the random variable, e.g. 2 for bivariate copula. args : tuple Arguments for copula parameters. The number of arguments depends on the copula. Returns ------- cdf : ndarray, (nobs, k_dim) Copula log-pdf evaluated at points ``u``. """ return np.log(self.pdf(u, *args)) @abstractmethod def cdf(self, u, args=()): """Cumulative distribution function evaluated at points u. Parameters ---------- u : array_like, 2-D Points of random variables in unit hypercube at which method is evaluated. The second (or last) dimension should be the same as the dimension of the random variable, e.g. 2 for bivariate copula. args : tuple Arguments for copula parameters. The number of arguments depends on the copula. Returns ------- cdf : ndarray, (nobs, k_dim) Copula cdf evaluated at points ``u``. """ def plot_scatter(self, sample=None, nobs=500, random_state=None, ax=None): """Sample the copula and plot. Parameters ---------- sample : array-like, optional The sample to plot. If not provided (the default), a sample is generated. nobs : int, optional Number of samples to generate from the copula. random_state : {None, int, `numpy.random.Generator`}, optional If `seed` is None then the legacy singleton NumPy generator. This will change after 0.13 to use a fresh NumPy ``Generator``, so you should explicitly pass a seeded ``Generator`` if you need reproducible results. If `seed` is an int, a new ``Generator`` instance is used, seeded with `seed`. If `seed` is already a ``Generator`` instance then that instance is used. ax : AxesSubplot, optional If given, this subplot is used to plot in instead of a new figure being created. Returns ------- fig : Figure If `ax` is None, the created figure. Otherwise the figure to which `ax` is connected. sample : array_like (n, d) Sample from the copula. See Also -------- statsmodels.tools.rng_qrng.check_random_state """ if self.k_dim != 2: raise ValueError("Can only plot 2-dimensional Copula.") if sample is None: sample = self.rvs(nobs=nobs, random_state=random_state) fig, ax = utils.create_mpl_ax(ax) ax.scatter(sample[:, 0], sample[:, 1]) ax.set_xlabel('u') ax.set_ylabel('v') return fig, sample def plot_pdf(self, ticks_nbr=10, ax=None): """Plot the PDF. Parameters ---------- ticks_nbr : int, optional Number of color isolines for the PDF. Default is 10. ax : AxesSubplot, optional If given, this subplot is used to plot in instead of a new figure being created. Returns ------- fig : Figure If `ax` is None, the created figure. Otherwise the figure to which `ax` is connected. """ from matplotlib import pyplot as plt if self.k_dim != 2: import warnings warnings.warn("Plotting 2-dimensional Copula.") n_samples = 100 eps = 1e-4 uu, vv = np.meshgrid(np.linspace(eps, 1 - eps, n_samples), np.linspace(eps, 1 - eps, n_samples)) points = np.vstack([uu.ravel(), vv.ravel()]).T data = self.pdf(points).T.reshape(uu.shape) min_ = np.nanpercentile(data, 5) max_ = np.nanpercentile(data, 95) fig, ax = utils.create_mpl_ax(ax) vticks = np.linspace(min_, max_, num=ticks_nbr) range_cbar = [min_, max_] cs = ax.contourf(uu, vv, data, vticks, antialiased=True, vmin=range_cbar[0], vmax=range_cbar[1]) ax.set_xlabel("u") ax.set_ylabel("v") ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.set_aspect('equal') cbar = plt.colorbar(cs, ticks=vticks) cbar.set_label('p') fig.tight_layout() return fig def tau_simulated(self, nobs=1024, random_state=None): """Kendall's tau based on simulated samples. Returns ------- tau : float Kendall's tau. """ x = self.rvs(nobs, random_state=random_state) return stats.kendalltau(x[:, 0], x[:, 1])[0] def fit_corr_param(self, data): """Copula correlation parameter using Kendall's tau of sample data. Parameters ---------- data : array_like Sample data used to fit `theta` using Kendall's tau. Returns ------- corr_param : float Correlation parameter of the copula, ``theta`` in Archimedean and pearson correlation in elliptical. """ x = np.asarray(data) if x.shape[1] != 2: import warnings warnings.warn("currently only first pair of data are used" " to compute kendall's tau") tau = stats.kendalltau(x[:, 0], x[:, 1])[0] return self._arg_from_tau(tau) def _arg_from_tau(self, tau): """Compute correlation parameter from tau. Parameters ---------- tau : float Kendall's tau. Returns ------- corr_param : float Correlation parameter of the copula, ``theta`` in Archimedean and pearson correlation in elliptical. """ raise NotImplementedError