"""
Various extensions to distributions
* skew normal and skew t distribution by Azzalini, A. & Capitanio, A.
* Gram-Charlier expansion distribution (using 4 moments),
* distributions based on non-linear transformation
- Transf_gen
- ExpTransf_gen, LogTransf_gen
- TransfTwo_gen
(defines as examples: square, negative square and abs transformations)
- this versions are without __new__
* mnvormcdf, mvstdnormcdf : cdf, rectangular integral for multivariate normal
distribution
TODO:
* Where is Transf_gen for general monotonic transformation ? found and added it
* write some docstrings, some parts I do not remember
* add Box-Cox transformation, parametrized ?
this is only partially cleaned, still includes test examples as functions
main changes
* add transf_gen (2010-05-09)
* added separate example and tests (2010-05-09)
* collect transformation function into classes
Example
-------
>>> logtg = Transf_gen(stats.t, np.exp, np.log,
numargs = 1, a=0, name = 'lnnorm',
longname = 'Exp transformed normal',
# extradoc = '\ndistribution of y = exp(x), with x standard normal'
'precision for moment andstats is not very high, 2-3 decimals')
>>> logtg.cdf(5, 6)
0.92067704211191848
>>> stats.t.cdf(np.log(5), 6)
0.92067704211191848
>>> logtg.pdf(5, 6)
0.021798547904239293
>>> stats.t.pdf(np.log(5), 6)
0.10899273954837908
>>> stats.t.pdf(np.log(5), 6)/5. #derivative
0.021798547909675815
Author: josef-pktd
License: BSD
"""
import numpy as np
from numpy import exp, poly1d, sqrt
import scipy
from scipy import special, stats
from scipy.stats import distributions
from statsmodels.stats.moment_helpers import mc2mvsk, mvsk2mc
try:
from scipy.stats._mvn import mvndst
except ImportError:
# Must be using SciPy <1.8.0 where this function was moved (it's not a
# public SciPy function, but we need it here)
from scipy.stats.mvn import mvndst
# note copied from distr_skewnorm_0.py
[docs]class SkewNorm_gen(distributions.rv_continuous):
"""
Univariate Skew-Normal distribution of Azzalini
class follows scipy.stats.distributions pattern
but with __init__
"""
def __init__(self):
# super(SkewNorm_gen,self).__init__(
distributions.rv_continuous.__init__(
self,
name="Skew Normal distribution",
shapes="alpha",
# extradoc=""" """,
)
def _argcheck(self, alpha):
return 1 # (alpha >= 0)
def _rvs(self, alpha):
# see http://azzalini.stat.unipd.it/SN/faq.html
delta = alpha / np.sqrt(1 + alpha**2)
u0 = stats.norm.rvs(size=self._size)
u1 = delta * u0 + np.sqrt(1 - delta**2) * stats.norm.rvs(
size=self._size
)
return np.where(u0 > 0, u1, -u1)
def _munp(self, n, alpha):
# use pdf integration with _mom0_sc if only _pdf is defined.
# default stats calculation uses ppf, which is much slower
return self._mom0_sc(n, alpha)
def _pdf(self, x, alpha):
# 2*normpdf(x)*normcdf(alpha*x)
return (
2.0
/ np.sqrt(2 * np.pi)
* np.exp(-(x**2) / 2.0)
* special.ndtr(alpha * x)
)
def _stats_skip(self, x, alpha, moments="mvsk"):
# skip for now to force moment integration as check
pass
skewnorm = SkewNorm_gen()
# generated the same way as distributions in stats.distributions
[docs]class SkewNorm2_gen(distributions.rv_continuous):
"""
Univariate Skew-Normal distribution of Azzalini
class follows scipy.stats.distributions pattern
Notes
-----
-inf < alpha < inf
"""
def _argcheck(self, alpha):
return 1 # where(alpha>=0, 1, 0)
def _pdf(self, x, alpha):
# 2*normpdf(x)*normcdf(alpha*x
return (
2.0
/ np.sqrt(2 * np.pi)
* np.exp(-(x**2) / 2.0)
* special.ndtr(alpha * x)
)
skewnorm2 = SkewNorm2_gen(
name="Skew Normal distribution",
shapes="alpha",
# extradoc=""" -inf < alpha < inf""",
)
[docs]class ACSkewT_gen(distributions.rv_continuous):
"""
Univariate Skew-T distribution of Azzalini
class follows scipy.stats.distributions pattern
but with __init__
Notes
-----
Skewed T distribution by Azzalini, A. & Capitanio, A. (2003)_
the pdf is given by:
pdf(x) = 2.0 * t.pdf(x, df) * t.cdf(df+1, alpha*x*np.sqrt((1+df)/(x**2+df)))
with alpha >=0
Note: different from skewed t distribution by Hansen 1999
.._
Azzalini, A. & Capitanio, A. (2003), Distributions generated by
perturbation of symmetry with emphasis on a multivariate skew-t
distribution, appears in J.Roy.Statist.Soc, series B, vol.65,
pp.367-389
"""
def __init__(self):
# super(SkewT_gen,self).__init__(
distributions.rv_continuous.__init__(
self,
name="Skew T distribution",
shapes="df, alpha",
# extradoc="""
# Skewed T distribution by Azzalini, A. & Capitanio, A. (2003)_
#
# the pdf is given by:
# pdf(x) = 2.0 * t.pdf(x, df) * t.cdf(df+1, alpha*x*np.sqrt((1+df)/(x**2+df)))
# with alpha >=0
# Note: different from skewed t distribution by Hansen 1999
# .._
# Azzalini, A. & Capitanio, A. (2003), Distributions generated by perturbation of
# symmetry with emphasis on a multivariate skew-t distribution,
# appears in J.Roy.Statist.Soc, series B, vol.65, pp.367-389
# """,
)
def _argcheck(self, df, alpha):
return (alpha == alpha) * (df > 0)
## def _arg_check(self, alpha):
## return np.where(alpha>=0, 0, 1)
## def _argcheck(self, alpha):
## return np.where(alpha>=0, 1, 0)
def _rvs(self, df, alpha):
# see http://azzalini.stat.unipd.it/SN/faq.html
# delta = alpha/np.sqrt(1+alpha**2)
V = stats.chi2.rvs(df, size=self._size)
z = skewnorm.rvs(alpha, size=self._size)
return z / np.sqrt(V / df)
def _munp(self, n, df, alpha):
# use pdf integration with _mom0_sc if only _pdf is defined.
# default stats calculation uses ppf
return self._mom0_sc(n, df, alpha)
def _pdf(self, x, df, alpha):
# 2*normpdf(x)*normcdf(alpha*x)
return (
2.0
* distributions.t._pdf(x, df)
* special.stdtr(
df + 1, alpha * x * np.sqrt((1 + df) / (x**2 + df))
)
)
##
##def mvsk2cm(*args):
## mu,sig,sk,kur = args
## # Get central moments
## cnt = [None]*4
## cnt[0] = mu
## cnt[1] = sig #*sig
## cnt[2] = sk * sig**1.5
## cnt[3] = (kur+3.0) * sig**2.0
## return cnt
##
##
##def mvsk2m(args):
## mc, mc2, skew, kurt = args#= self._stats(*args,**mdict)
## mnc = mc
## mnc2 = mc2 + mc*mc
## mc3 = skew*(mc2**1.5) # 3rd central moment
## mnc3 = mc3+3*mc*mc2+mc**3 # 3rd non-central moment
## mc4 = (kurt+3.0)*(mc2**2.0) # 4th central moment
## mnc4 = mc4+4*mc*mc3+6*mc*mc*mc2+mc**4
## return (mc, mc2, mc3, mc4), (mnc, mnc2, mnc3, mnc4)
##
##def mc2mvsk(args):
## mc, mc2, mc3, mc4 = args
## skew = mc3 / mc2**1.5
## kurt = mc4 / mc2**2.0 - 3.0
## return (mc, mc2, skew, kurt)
##
##def m2mc(args):
## mnc, mnc2, mnc3, mnc4 = args
## mc = mnc
## mc2 = mnc2 - mnc*mnc
## #mc3 = skew*(mc2**1.5) # 3rd central moment
## mc3 = mnc3 - (3*mc*mc2+mc**3) # 3rd central moment
## #mc4 = (kurt+3.0)*(mc2**2.0) # 4th central moment
## mc4 = mnc4 - (4*mc*mc3+6*mc*mc*mc2+mc**4)
## return (mc, mc2, mc3, mc4)
def _hermnorm(N):
# return the negatively normalized hermite polynomials up to order N-1
# (inclusive)
# using the recursive relationship
# p_n+1 = p_n(x)' - x*p_n(x)
# and p_0(x) = 1
plist = [None] * N
plist[0] = poly1d(1)
for n in range(1, N):
plist[n] = plist[n - 1].deriv() - poly1d([1, 0]) * plist[n - 1]
return plist
[docs]def pdf_moments_st(cnt):
"""Return the Gaussian expanded pdf function given the list of central
moments (first one is mean).
version of scipy.stats, any changes ?
the scipy.stats version has a bug and returns normal distribution
"""
N = len(cnt)
if N < 2:
raise ValueError(
"At least two moments must be given to " "approximate the pdf."
)
totp = poly1d(1)
sig = sqrt(cnt[1])
mu = cnt[0]
if N > 2:
Dvals = _hermnorm(N + 1)
for k in range(3, N + 1):
# Find Ck
Ck = 0.0
for n in range((k - 3) / 2):
m = k - 2 * n
if m % 2: # m is odd
momdiff = cnt[m - 1]
else:
momdiff = cnt[m - 1] - sig * sig * scipy.factorial2(m - 1)
Ck += Dvals[k][m] / sig**m * momdiff
# Add to totp
raise SystemError
print(Dvals)
print(Ck)
totp = totp + Ck * Dvals[k]
def thisfunc(x):
xn = (x - mu) / sig
return totp(xn) * exp(-xn * xn / 2.0) / sqrt(2 * np.pi) / sig
return thisfunc, totp
[docs]def pdf_mvsk(mvsk):
"""Return the Gaussian expanded pdf function given the list of 1st, 2nd
moment and skew and Fisher (excess) kurtosis.
Parameters
----------
mvsk : list of mu, mc2, skew, kurt
distribution is matched to these four moments
Returns
-------
pdffunc : function
function that evaluates the pdf(x), where x is the non-standardized
random variable.
Notes
-----
Changed so it works only if four arguments are given. Uses explicit
formula, not loop.
This implements a Gram-Charlier expansion of the normal distribution
where the first 2 moments coincide with those of the normal distribution
but skew and kurtosis can deviate from it.
In the Gram-Charlier distribution it is possible that the density
becomes negative. This is the case when the deviation from the
normal distribution is too large.
References
----------
https://en.wikipedia.org/wiki/Edgeworth_series
Johnson N.L., S. Kotz, N. Balakrishnan: Continuous Univariate
Distributions, Volume 1, 2nd ed., p.30
"""
N = len(mvsk)
if N < 4:
raise ValueError(
"Four moments must be given to " "approximate the pdf."
)
mu, mc2, skew, kurt = mvsk
totp = poly1d(1)
sig = sqrt(mc2)
if N > 2:
Dvals = _hermnorm(N + 1)
C3 = skew / 6.0
C4 = kurt / 24.0
# Note: Hermite polynomial for order 3 in _hermnorm is negative
# instead of positive
totp = totp - C3 * Dvals[3] + C4 * Dvals[4]
def pdffunc(x):
xn = (x - mu) / sig
return totp(xn) * np.exp(-xn * xn / 2.0) / np.sqrt(2 * np.pi) / sig
return pdffunc
[docs]def pdf_moments(cnt):
"""Return the Gaussian expanded pdf function given the list of central
moments (first one is mean).
Changed so it works only if four arguments are given. Uses explicit
formula, not loop.
Notes
-----
This implements a Gram-Charlier expansion of the normal distribution
where the first 2 moments coincide with those of the normal distribution
but skew and kurtosis can deviate from it.
In the Gram-Charlier distribution it is possible that the density
becomes negative. This is the case when the deviation from the
normal distribution is too large.
References
----------
https://en.wikipedia.org/wiki/Edgeworth_series
Johnson N.L., S. Kotz, N. Balakrishnan: Continuous Univariate
Distributions, Volume 1, 2nd ed., p.30
"""
N = len(cnt)
if N < 2:
raise ValueError(
"At least two moments must be given to " "approximate the pdf."
)
mc, mc2, mc3, mc4 = cnt
skew = mc3 / mc2**1.5
kurt = mc4 / mc2**2.0 - 3.0 # Fisher kurtosis, excess kurtosis
totp = poly1d(1)
sig = sqrt(cnt[1])
mu = cnt[0]
if N > 2:
Dvals = _hermnorm(N + 1)
## for k in range(3,N+1):
## # Find Ck
## Ck = 0.0
## for n in range((k-3)/2):
## m = k-2*n
## if m % 2: # m is odd
## momdiff = cnt[m-1]
## else:
## momdiff = cnt[m-1] - sig*sig*scipy.factorial2(m-1)
## Ck += Dvals[k][m] / sig**m * momdiff
## # Add to totp
## raise
## print Dvals
## print Ck
## totp = totp + Ck*Dvals[k]
C3 = skew / 6.0
C4 = kurt / 24.0
totp = totp - C3 * Dvals[3] + C4 * Dvals[4]
def thisfunc(x):
xn = (x - mu) / sig
return totp(xn) * np.exp(-xn * xn / 2.0) / np.sqrt(2 * np.pi) / sig
return thisfunc
[docs]class NormExpan_gen(distributions.rv_continuous):
"""Gram-Charlier Expansion of Normal distribution
class follows scipy.stats.distributions pattern
but with __init__
Notes
-----
The distribution is defined as the Gram-Charlier expansion of
the normal distribution using the first four moments. The pdf
is given by
pdf(x) = (1+ skew/6.0 * H(xc,3) + kurt/24.0 * H(xc,4))*normpdf(xc)
where xc = (x-mu)/sig is the standardized value of the random variable
and H(xc,3) and H(xc,4) are Hermite polynomials
Note: This distribution has to be parametrized during
initialization and instantiation, and does not have a shape
parameter after instantiation (similar to frozen distribution
except for location and scale.) Location and scale can be used
as with other distributions, however note, that they are relative
to the initialized distribution.
"""
def __init__(self, args, **kwds):
# todo: replace with super call
distributions.rv_continuous.__init__(
self,
name="Normal Expansion distribution",
shapes=" ",
# extradoc="""
# The distribution is defined as the Gram-Charlier expansion of
# the normal distribution using the first four moments. The pdf
# is given by
# pdf(x) = (1+ skew/6.0 * H(xc,3) + kurt/24.0 * H(xc,4))*normpdf(xc)
# where xc = (x-mu)/sig is the standardized value of the random variable
# and H(xc,3) and H(xc,4) are Hermite polynomials
# Note: This distribution has to be parametrized during
# initialization and instantiation, and does not have a shape
# parameter after instantiation (similar to frozen distribution
# except for location and scale.) Location and scale can be used
# as with other distributions, however note, that they are relative
# to the initialized distribution.
# """,
)
# print args, kwds
mode = kwds.get("mode", "sample")
if mode == "sample":
mu, sig, sk, kur = stats.describe(args)[2:]
self.mvsk = (mu, sig, sk, kur)
cnt = mvsk2mc((mu, sig, sk, kur))
elif mode == "mvsk":
cnt = mvsk2mc(args)
self.mvsk = args
elif mode == "centmom":
cnt = args
self.mvsk = mc2mvsk(cnt)
else:
raise ValueError("mode must be 'mvsk' or centmom")
self.cnt = cnt
# self.mvsk = (mu,sig,sk,kur)
# self._pdf = pdf_moments(cnt)
self._pdf = pdf_mvsk(self.mvsk)
def _munp(self, n):
# use pdf integration with _mom0_sc if only _pdf is defined.
# default stats calculation uses ppf
return self._mom0_sc(n)
def _stats_skip(self):
# skip for now to force numerical integration of pdf for testing
return self.mvsk
## copied from nonlinear_transform_gen.py
""" A class for the distribution of a non-linear monotonic transformation of a continuous random variable
simplest usage:
example: create log-gamma distribution, i.e. y = log(x),
where x is gamma distributed (also available in scipy.stats)
loggammaexpg = Transf_gen(stats.gamma, np.log, np.exp)
example: what is the distribution of the discount factor y=1/(1+x)
where interest rate x is normally distributed with N(mux,stdx**2)')?
(just to come up with a story that implies a nice transformation)
invnormalg = Transf_gen(stats.norm, inversew, inversew_inv, decr=True, a=-np.inf)
This class does not work well for distributions with difficult shapes,
e.g. 1/x where x is standard normal, because of the singularity and jump at zero.
Note: I'm working from my version of scipy.stats.distribution.
But this script runs under scipy 0.6.0 (checked with numpy: 1.2.0rc2 and python 2.4)
This is not yet thoroughly tested, polished or optimized
TODO:
* numargs handling is not yet working properly, numargs needs to be specified (default = 0 or 1)
* feeding args and kwargs to underlying distribution is untested and incomplete
* distinguish args and kwargs for the transformed and the underlying distribution
- currently all args and no kwargs are transmitted to underlying distribution
- loc and scale only work for transformed, but not for underlying distribution
- possible to separate args for transformation and underlying distribution parameters
* add _rvs as method, will be faster in many cases
Created on Tuesday, October 28, 2008, 12:40:37 PM
Author: josef-pktd
License: BSD
"""
def get_u_argskwargs(**kwargs):
# Todo: What's this? wrong spacing, used in Transf_gen TransfTwo_gen
u_kwargs = dict(
(k.replace("u_", "", 1), v)
for k, v in kwargs.items()
if k.startswith("u_")
)
u_args = u_kwargs.pop("u_args", None)
return u_args, u_kwargs
class Transf_gen(distributions.rv_continuous):
"""a class for non-linear monotonic transformation of a continuous random variable"""
def __init__(self, kls, func, funcinv, *args, **kwargs):
# print args
# print kwargs
self.func = func
self.funcinv = funcinv
# explicit for self.__dict__.update(kwargs)
# need to set numargs because inspection does not work
self.numargs = kwargs.pop("numargs", 0)
# print self.numargs
name = kwargs.pop("name", "transfdist")
longname = kwargs.pop(
"longname", "Non-linear transformed distribution"
)
a = kwargs.pop("a", -np.inf)
b = kwargs.pop("b", np.inf)
self.decr = kwargs.pop("decr", False)
# defines whether it is a decreasing (True)
# or increasing (False) monotonic transformation
self.u_args, self.u_kwargs = get_u_argskwargs(**kwargs)
self.kls = kls # (self.u_args, self.u_kwargs)
# possible to freeze the underlying distribution
super(Transf_gen, self).__init__(
a=a, b=b, name=name, longname=longname
)
def _rvs(self, *args, **kwargs):
self.kls._size = self._size
return self.funcinv(self.kls._rvs(*args))
def _cdf(self, x, *args, **kwargs):
# print args
if not self.decr:
return self.kls._cdf(self.funcinv(x), *args, **kwargs)
# note scipy _cdf only take *args not *kwargs
else:
return 1.0 - self.kls._cdf(self.funcinv(x), *args, **kwargs)
def _ppf(self, q, *args, **kwargs):
if not self.decr:
return self.func(self.kls._ppf(q, *args, **kwargs))
else:
return self.func(self.kls._ppf(1 - q, *args, **kwargs))
def inverse(x):
return np.divide(1.0, x)
mux, stdx = 0.05, 0.1
mux, stdx = 9.0, 1.0
def inversew(x):
return 1.0 / (1 + mux + x * stdx)
def inversew_inv(x):
return (1.0 / x - 1.0 - mux) / stdx # .np.divide(1.0,x)-10
def identit(x):
return x
invdnormalg = Transf_gen(
stats.norm,
inversew,
inversew_inv,
decr=True, # a=-np.inf,
numargs=0,
name="discf",
longname="normal-based discount factor",
# extradoc="\ndistribution of discount factor y=1/(1+x)) with x N(0.05,0.1**2)",
)
lognormalg = Transf_gen(
stats.norm,
np.exp,
np.log,
numargs=2,
a=0,
name="lnnorm",
longname="Exp transformed normal",
# extradoc="\ndistribution of y = exp(x), with x standard normal"
# "precision for moment andstats is not very high, 2-3 decimals",
)
loggammaexpg = Transf_gen(stats.gamma, np.log, np.exp, numargs=1)
## copied form nonlinear_transform_short.py
"""univariate distribution of a non-linear monotonic transformation of a
random variable
"""
class ExpTransf_gen(distributions.rv_continuous):
"""Distribution based on log/exp transformation
the constructor can be called with a distribution class
and generates the distribution of the transformed random variable
"""
def __init__(self, kls, *args, **kwargs):
# print args
# print kwargs
# explicit for self.__dict__.update(kwargs)
if "numargs" in kwargs:
self.numargs = kwargs["numargs"]
else:
self.numargs = 1
if "name" in kwargs:
name = kwargs["name"]
else:
name = "Log transformed distribution"
if "a" in kwargs:
a = kwargs["a"]
else:
a = 0
super(ExpTransf_gen, self).__init__(a=0, name=name)
self.kls = kls
def _cdf(self, x, *args):
pass
# print args
return self.kls.cdf(np.log(x), *args)
def _ppf(self, q, *args):
return np.exp(self.kls.ppf(q, *args))
class LogTransf_gen(distributions.rv_continuous):
"""Distribution based on log/exp transformation
the constructor can be called with a distribution class
and generates the distribution of the transformed random variable
"""
def __init__(self, kls, *args, **kwargs):
# explicit for self.__dict__.update(kwargs)
if "numargs" in kwargs:
self.numargs = kwargs["numargs"]
else:
self.numargs = 1
if "name" in kwargs:
name = kwargs["name"]
else:
name = "Log transformed distribution"
if "a" in kwargs:
a = kwargs["a"]
else:
a = 0
super(LogTransf_gen, self).__init__(a=a, name=name)
self.kls = kls
def _cdf(self, x, *args):
# print args
return self.kls._cdf(np.exp(x), *args)
def _ppf(self, q, *args):
return np.log(self.kls._ppf(q, *args))
## copied from transformtwo.py
"""
Created on Apr 28, 2009
@author: Josef Perktold
"""
""" A class for the distribution of a non-linear u-shaped or hump shaped transformation of a
continuous random variable
This is a companion to the distributions of non-linear monotonic transformation to the case
when the inverse mapping is a 2-valued correspondence, for example for absolute value or square
simplest usage:
example: create squared distribution, i.e. y = x**2,
where x is normal or t distributed
This class does not work well for distributions with difficult shapes,
e.g. 1/x where x is standard normal, because of the singularity and jump at zero.
This verifies for normal - chi2, normal - halfnorm, foldnorm, and t - F
TODO:
* numargs handling is not yet working properly,
numargs needs to be specified (default = 0 or 1)
* feeding args and kwargs to underlying distribution works in t distribution example
* distinguish args and kwargs for the transformed and the underlying distribution
- currently all args and no kwargs are transmitted to underlying distribution
- loc and scale only work for transformed, but not for underlying distribution
- possible to separate args for transformation and underlying distribution parameters
* add _rvs as method, will be faster in many cases
"""
class TransfTwo_gen(distributions.rv_continuous):
"""Distribution based on a non-monotonic (u- or hump-shaped transformation)
the constructor can be called with a distribution class, and functions
that define the non-linear transformation.
and generates the distribution of the transformed random variable
Note: the transformation, it's inverse and derivatives need to be fully
specified: func, funcinvplus, funcinvminus, derivplus, derivminus.
Currently no numerical derivatives or inverse are calculated
This can be used to generate distribution instances similar to the
distributions in scipy.stats.
"""
# a class for non-linear non-monotonic transformation of a continuous random variable
def __init__(
self,
kls,
func,
funcinvplus,
funcinvminus,
derivplus,
derivminus,
*args,
**kwargs
):
# print args
# print kwargs
self.func = func
self.funcinvplus = funcinvplus
self.funcinvminus = funcinvminus
self.derivplus = derivplus
self.derivminus = derivminus
# explicit for self.__dict__.update(kwargs)
# need to set numargs because inspection does not work
self.numargs = kwargs.pop("numargs", 0)
# print self.numargs
name = kwargs.pop("name", "transfdist")
longname = kwargs.pop(
"longname", "Non-linear transformed distribution"
)
a = kwargs.pop("a", -np.inf) # attached to self in super
b = kwargs.pop("b", np.inf) # self.a, self.b would be overwritten
self.shape = kwargs.pop("shape", False)
# defines whether it is a `u` shaped or `hump' shaped
# transformation
self.u_args, self.u_kwargs = get_u_argskwargs(**kwargs)
self.kls = kls # (self.u_args, self.u_kwargs)
# possible to freeze the underlying distribution
super(TransfTwo_gen, self).__init__(
a=a,
b=b,
name=name,
shapes=kls.shapes,
longname=longname,
)
# add enough info for self.freeze() to be able to reconstruct the instance
self._ctor_param.update(
dict(
kls=kls,
func=func,
funcinvplus=funcinvplus,
funcinvminus=funcinvminus,
derivplus=derivplus,
derivminus=derivminus,
shape=self.shape,
)
)
def _rvs(self, *args):
self.kls._size = (
self._size
) # size attached to self, not function argument
return self.func(self.kls._rvs(*args))
def _pdf(self, x, *args, **kwargs):
# print args
if self.shape == "u":
signpdf = 1
elif self.shape == "hump":
signpdf = -1
else:
raise ValueError("shape can only be `u` or `hump`")
return signpdf * (
self.derivplus(x)
* self.kls._pdf(self.funcinvplus(x), *args, **kwargs)
- self.derivminus(x)
* self.kls._pdf(self.funcinvminus(x), *args, **kwargs)
)
# note scipy _cdf only take *args not *kwargs
def _cdf(self, x, *args, **kwargs):
# print args
if self.shape == "u":
return self.kls._cdf(
self.funcinvplus(x), *args, **kwargs
) - self.kls._cdf(self.funcinvminus(x), *args, **kwargs)
# note scipy _cdf only take *args not *kwargs
else:
return 1.0 - self._sf(x, *args, **kwargs)
def _sf(self, x, *args, **kwargs):
# print args
if self.shape == "hump":
return self.kls._cdf(
self.funcinvplus(x), *args, **kwargs
) - self.kls._cdf(self.funcinvminus(x), *args, **kwargs)
# note scipy _cdf only take *args not *kwargs
else:
return 1.0 - self._cdf(x, *args, **kwargs)
def _munp(self, n, *args, **kwargs):
return self._mom0_sc(n, *args)
# ppf might not be possible in general case?
# should be possible in symmetric case
# def _ppf(self, q, *args, **kwargs):
# if self.shape == 'u':
# return self.func(self.kls._ppf(q,*args, **kwargs))
# elif self.shape == 'hump':
# return self.func(self.kls._ppf(1-q,*args, **kwargs))
# TODO: rename these functions to have unique names
class SquareFunc(object):
"""class to hold quadratic function with inverse function and derivative
using instance methods instead of class methods, if we want extension
to parametrized function
"""
def inverseplus(self, x):
return np.sqrt(x)
def inverseminus(self, x):
return 0.0 - np.sqrt(x)
def derivplus(self, x):
return 0.5 / np.sqrt(x)
def derivminus(self, x):
return 0.0 - 0.5 / np.sqrt(x)
def squarefunc(self, x):
return np.power(x, 2)
sqfunc = SquareFunc()
squarenormalg = TransfTwo_gen(
stats.norm,
sqfunc.squarefunc,
sqfunc.inverseplus,
sqfunc.inverseminus,
sqfunc.derivplus,
sqfunc.derivminus,
shape="u",
a=0.0,
b=np.inf,
numargs=0,
name="squarenorm",
longname="squared normal distribution",
# extradoc="\ndistribution of the square of a normal random variable"
# + " y=x**2 with x N(0.0,1)",
)
# u_loc=l, u_scale=s)
squaretg = TransfTwo_gen(
stats.t,
sqfunc.squarefunc,
sqfunc.inverseplus,
sqfunc.inverseminus,
sqfunc.derivplus,
sqfunc.derivminus,
shape="u",
a=0.0,
b=np.inf,
numargs=1,
name="squarenorm",
longname="squared t distribution",
# extradoc="\ndistribution of the square of a t random variable"
# + " y=x**2 with x t(dof,0.0,1)",
)
def inverseplus(x):
return np.sqrt(-x)
def inverseminus(x):
return 0.0 - np.sqrt(-x)
def derivplus(x):
return 0.0 - 0.5 / np.sqrt(-x)
def derivminus(x):
return 0.5 / np.sqrt(-x)
def negsquarefunc(x):
return -np.power(x, 2)
negsquarenormalg = TransfTwo_gen(
stats.norm,
negsquarefunc,
inverseplus,
inverseminus,
derivplus,
derivminus,
shape="hump",
a=-np.inf,
b=0.0,
numargs=0,
name="negsquarenorm",
longname="negative squared normal distribution",
# extradoc="\ndistribution of the negative square of a normal random variable"
# + " y=-x**2 with x N(0.0,1)",
)
# u_loc=l, u_scale=s)
def inverseplus(x):
return x
def inverseminus(x):
return 0.0 - x
def derivplus(x):
return 1.0
def derivminus(x):
return 0.0 - 1.0
def absfunc(x):
return np.abs(x)
absnormalg = TransfTwo_gen(
stats.norm,
np.abs,
inverseplus,
inverseminus,
derivplus,
derivminus,
shape="u",
a=0.0,
b=np.inf,
numargs=0,
name="absnorm",
longname="absolute of normal distribution",
# extradoc="\ndistribution of the absolute value of a normal random variable"
# + " y=abs(x) with x N(0,1)",
)
# copied from mvncdf.py
"""multivariate normal probabilities and cumulative distribution function
a wrapper for scipy.stats._mvn.mvndst
SUBROUTINE MVNDST( N, LOWER, UPPER, INFIN, CORREL, MAXPTS,
& ABSEPS, RELEPS, ERROR, VALUE, INFORM )
*
* A subroutine for computing multivariate normal probabilities.
* This subroutine uses an algorithm given in the paper
* "Numerical Computation of Multivariate Normal Probabilities", in
* J. of Computational and Graphical Stat., 1(1992), pp. 141-149, by
* Alan Genz
* Department of Mathematics
* Washington State University
* Pullman, WA 99164-3113
* Email : AlanGenz@wsu.edu
*
* Parameters
*
* N INTEGER, the number of variables.
* LOWER REAL, array of lower integration limits.
* UPPER REAL, array of upper integration limits.
* INFIN INTEGER, array of integration limits flags:
* if INFIN(I) < 0, Ith limits are (-infinity, infinity);
* if INFIN(I) = 0, Ith limits are (-infinity, UPPER(I)];
* if INFIN(I) = 1, Ith limits are [LOWER(I), infinity);
* if INFIN(I) = 2, Ith limits are [LOWER(I), UPPER(I)].
* CORREL REAL, array of correlation coefficients; the correlation
* coefficient in row I column J of the correlation matrix
* should be stored in CORREL( J + ((I-2)*(I-1))/2 ), for J < I.
* The correlation matrix must be positive semidefinite.
* MAXPTS INTEGER, maximum number of function values allowed. This
* parameter can be used to limit the time. A sensible
* strategy is to start with MAXPTS = 1000*N, and then
* increase MAXPTS if ERROR is too large.
* ABSEPS REAL absolute error tolerance.
* RELEPS REAL relative error tolerance.
* ERROR REAL estimated absolute error, with 99% confidence level.
* VALUE REAL estimated value for the integral
* INFORM INTEGER, termination status parameter:
* if INFORM = 0, normal completion with ERROR < EPS;
* if INFORM = 1, completion with ERROR > EPS and MAXPTS
* function vaules used; increase MAXPTS to
* decrease ERROR;
* if INFORM = 2, N > 500 or N < 1.
*
>>> mvndst([0.0,0.0],[10.0,10.0],[0,0],[0.5])
(2e-016, 1.0, 0)
>>> mvndst([0.0,0.0],[100.0,100.0],[0,0],[0.0])
(2e-016, 1.0, 0)
>>> mvndst([0.0,0.0],[1.0,1.0],[0,0],[0.0])
(2e-016, 0.70786098173714096, 0)
>>> mvndst([0.0,0.0],[0.001,1.0],[0,0],[0.0])
(2e-016, 0.42100802096993045, 0)
>>> mvndst([0.0,0.0],[0.001,10.0],[0,0],[0.0])
(2e-016, 0.50039894221391101, 0)
>>> mvndst([0.0,0.0],[0.001,100.0],[0,0],[0.0])
(2e-016, 0.50039894221391101, 0)
>>> mvndst([0.0,0.0],[0.01,100.0],[0,0],[0.0])
(2e-016, 0.5039893563146316, 0)
>>> mvndst([0.0,0.0],[0.1,100.0],[0,0],[0.0])
(2e-016, 0.53982783727702899, 0)
>>> mvndst([0.0,0.0],[0.1,100.0],[2,2],[0.0])
(2e-016, 0.019913918638514494, 0)
>>> mvndst([0.0,0.0],[0.0,0.0],[0,0],[0.0])
(2e-016, 0.25, 0)
>>> mvndst([0.0,0.0],[0.0,0.0],[-1,0],[0.0])
(2e-016, 0.5, 0)
>>> mvndst([0.0,0.0],[0.0,0.0],[-1,0],[0.5])
(2e-016, 0.5, 0)
>>> mvndst([0.0,0.0],[0.0,0.0],[0,0],[0.5])
(2e-016, 0.33333333333333337, 0)
>>> mvndst([0.0,0.0],[0.0,0.0],[0,0],[0.99])
(2e-016, 0.47747329317779391, 0)
"""
informcode = {
0: "normal completion with ERROR < EPS",
1: """completion with ERROR > EPS and MAXPTS function values used;
increase MAXPTS to decrease ERROR;""",
2: "N > 500 or N < 1",
}
[docs]def mvstdnormcdf(lower, upper, corrcoef, **kwds):
"""standardized multivariate normal cumulative distribution function
This is a wrapper for scipy.stats._mvn.mvndst which calculates
a rectangular integral over a standardized multivariate normal
distribution.
This function assumes standardized scale, that is the variance in each dimension
is one, but correlation can be arbitrary, covariance = correlation matrix
Parameters
----------
lower, upper : array_like, 1d
lower and upper integration limits with length equal to the number
of dimensions of the multivariate normal distribution. It can contain
-np.inf or np.inf for open integration intervals
corrcoef : float or array_like
specifies correlation matrix in one of three ways, see notes
optional keyword parameters to influence integration
* maxpts : int, maximum number of function values allowed. This
parameter can be used to limit the time. A sensible
strategy is to start with `maxpts` = 1000*N, and then
increase `maxpts` if ERROR is too large.
* abseps : float absolute error tolerance.
* releps : float relative error tolerance.
Returns
-------
cdfvalue : float
value of the integral
Notes
-----
The correlation matrix corrcoef can be given in 3 different ways
If the multivariate normal is two-dimensional than only the
correlation coefficient needs to be provided.
For general dimension the correlation matrix can be provided either
as a one-dimensional array of the upper triangular correlation
coefficients stacked by rows, or as full square correlation matrix
See Also
--------
mvnormcdf : cdf of multivariate normal distribution without
standardization
Examples
--------
>>> print(mvstdnormcdf([-np.inf,-np.inf], [0.0,np.inf], 0.5))
0.5
>>> corr = [[1.0, 0, 0.5],[0,1,0],[0.5,0,1]]
>>> print(mvstdnormcdf([-np.inf,-np.inf,-100.0], [0.0,0.0,0.0], corr, abseps=1e-6))
0.166666399198
>>> print(mvstdnormcdf([-np.inf,-np.inf,-100.0],[0.0,0.0,0.0],corr, abseps=1e-8))
something wrong completion with ERROR > EPS and MAXPTS function values used;
increase MAXPTS to decrease ERROR; 1.048330348e-006
0.166666546218
>>> print(mvstdnormcdf([-np.inf,-np.inf,-100.0],[0.0,0.0,0.0], corr, \
maxpts=100000, abseps=1e-8))
0.166666588293
"""
n = len(lower)
# do not know if converting to array is necessary,
# but it makes ndim check possible
lower = np.array(lower)
upper = np.array(upper)
corrcoef = np.array(corrcoef)
correl = np.zeros(int(n * (n - 1) / 2.0)) # dtype necessary?
if (lower.ndim != 1) or (upper.ndim != 1):
raise ValueError("can handle only 1D bounds")
if len(upper) != n:
raise ValueError("bounds have different lengths")
if n == 2 and corrcoef.size == 1:
correl = corrcoef
# print 'case scalar rho', n
elif corrcoef.ndim == 1 and len(corrcoef) == n * (n - 1) / 2.0:
# print 'case flat corr', corrcoeff.shape
correl = corrcoef
elif corrcoef.shape == (n, n):
# print 'case square corr', correl.shape
correl = corrcoef[np.tril_indices(n, -1)]
# for ii in range(n):
# for jj in range(ii):
# correl[ jj + ((ii-2)*(ii-1))/2] = corrcoef[ii,jj]
else:
raise ValueError("corrcoef has incorrect dimension")
if "maxpts" not in kwds:
if n > 2:
kwds["maxpts"] = 10000 * n
lowinf = np.isneginf(lower)
uppinf = np.isposinf(upper)
infin = 2.0 * np.ones(n)
np.putmask(infin, lowinf, 0) # infin.putmask(0,lowinf)
np.putmask(infin, uppinf, 1) # infin.putmask(1,uppinf)
# this has to be last
np.putmask(infin, lowinf * uppinf, -1)
## #remove infs
## np.putmask(lower,lowinf,-100)# infin.putmask(0,lowinf)
## np.putmask(upper,uppinf,100) #infin.putmask(1,uppinf)
# print lower,',',upper,',',infin,',',correl
# print correl.shape
# print kwds.items()
error, cdfvalue, inform = mvndst(lower, upper, infin, correl, **kwds)
if inform:
print("something wrong", informcode[inform], error)
return cdfvalue
[docs]def mvnormcdf(upper, mu, cov, lower=None, **kwds):
"""
Multivariate normal cumulative distribution function
This is a wrapper for scipy.stats._mvn.mvndst which calculates
a rectangular integral over a multivariate normal distribution.
Parameters
----------
lower, upper : array_like, 1d
lower and upper integration limits with length equal to the number
of dimensions of the multivariate normal distribution. It can contain
-np.inf or np.inf for open integration intervals
mu : array_lik, 1d
list or array of means
cov : array_like, 2d
specifies covariance matrix
optional keyword parameters to influence integration
* maxpts : int, maximum number of function values allowed. This
parameter can be used to limit the time. A sensible
strategy is to start with `maxpts` = 1000*N, and then
increase `maxpts` if ERROR is too large.
* abseps : float absolute error tolerance.
* releps : float relative error tolerance.
Returns
-------
cdfvalue : float
value of the integral
Notes
-----
This function normalizes the location and scale of the multivariate
normal distribution and then uses `mvstdnormcdf` to call the integration.
See Also
--------
mvstdnormcdf : location and scale standardized multivariate normal cdf
"""
upper = np.array(upper)
if lower is None:
lower = -np.ones(upper.shape) * np.inf
else:
lower = np.array(lower)
cov = np.array(cov)
stdev = np.sqrt(np.diag(cov)) # standard deviation vector
# do I need to make sure stdev is float and not int?
# is this correct to normalize to corr?
lower = (lower - mu) / stdev
upper = (upper - mu) / stdev
divrow = np.atleast_2d(stdev)
corr = cov / divrow / divrow.T
# v/np.sqrt(np.atleast_2d(np.diag(covv)))/np.sqrt(np.atleast_2d(np.diag(covv))).T
return mvstdnormcdf(lower, upper, corr, **kwds)