Source code for statsmodels.stats.oneway
"""
Created on Wed Mar 18 10:33:38 2020
Author: Josef Perktold
License: BSD-3
"""
import numpy as np
from scipy import stats
from scipy.special import ncfdtrinc
# functions that use scipy.special instead of boost based function in stats
from statsmodels.stats.power import ncf_cdf, ncf_ppf
from statsmodels.stats.robust_compare import TrimmedMean, scale_transform
from statsmodels.tools.testing import Holder
from statsmodels.stats.base import HolderTuple
[docs]
def effectsize_oneway(means, vars_, nobs, use_var="unequal", ddof_between=0):
"""
Effect size corresponding to Cohen's f = nc / nobs for oneway anova
This contains adjustment for Welch and Brown-Forsythe Anova so that
effect size can be used with FTestAnovaPower.
Parameters
----------
means : array_like
Mean of samples to be compared
vars_ : float or array_like
Residual (within) variance of each sample or pooled
If ``vars_`` is scalar, then it is interpreted as pooled variance that
is the same for all samples, ``use_var`` will be ignored.
Otherwise, the variances are used depending on the ``use_var`` keyword.
nobs : int or array_like
Number of observations for the samples.
If nobs is scalar, then it is assumed that all samples have the same
number ``nobs`` of observation, i.e. a balanced sample case.
Otherwise, statistics will be weighted corresponding to nobs.
Only relative sizes are relevant, any proportional change to nobs does
not change the effect size.
use_var : {"unequal", "equal", "bf"}
If ``use_var`` is "unequal", then the variances can differ across
samples and the effect size for Welch anova will be computed.
ddof_between : int
Degrees of freedom correction for the weighted between sum of squares.
The denominator is ``nobs_total - ddof_between``
This can be used to match differences across reference literature.
Returns
-------
f2 : float
Effect size corresponding to squared Cohen's f, which is also equal
to the noncentrality divided by total number of observations.
Notes
-----
This currently handles the following cases for oneway anova
- balanced sample with homoscedastic variances
- samples with different number of observations and with homoscedastic
variances
- samples with different number of observations and with heteroskedastic
variances. This corresponds to Welch anova
In the case of "unequal" and "bf" methods for unequal variances, the
effect sizes do not directly correspond to the test statistic in Anova.
Both have correction terms dropped or added, so the effect sizes match up
with using FTestAnovaPower.
If all variances are equal, then all three methods result in the same
effect size. If variances are unequal, then the three methods produce
small differences in effect size.
Note, the effect size and power computation for BF Anova was not found in
the literature. The correction terms were added so that FTestAnovaPower
provides a good approximation to the power.
Status: experimental
We might add additional returns, if those are needed to support power
and sample size applications.
Examples
--------
The following shows how to compute effect size and power for each of the
three anova methods. The null hypothesis is that the means are equal which
corresponds to a zero effect size. Under the alternative, means differ
with two sample means at a distance delta from the mean. We assume the
variance is the same under the null and alternative hypothesis.
``nobs`` for the samples defines the fraction of observations in the
samples. ``nobs`` in the power method defines the total sample size.
In simulations, the computed power for standard anova,
i.e.``use_var="equal"`` overestimates the simulated power by a few percent.
The equal variance assumption does not hold in this example.
>>> from statsmodels.stats.oneway import effectsize_oneway
>>> from statsmodels.stats.power import FTestAnovaPower
>>>
>>> nobs = np.array([10, 12, 13, 15])
>>> delta = 0.5
>>> means_alt = np.array([-1, 0, 0, 1]) * delta
>>> vars_ = np.arange(1, len(means_alt) + 1)
>>>
>>> f2_alt = effectsize_oneway(means_alt, vars_, nobs, use_var="equal")
>>> f2_alt
0.04581300813008131
>>>
>>> kwds = {'effect_size': np.sqrt(f2_alt), 'nobs': 100, 'alpha': 0.05,
... 'k_groups': 4}
>>> power = FTestAnovaPower().power(**kwds)
>>> power
0.39165892158983273
>>>
>>> f2_alt = effectsize_oneway(means_alt, vars_, nobs, use_var="unequal")
>>> f2_alt
0.060640138408304504
>>>
>>> kwds['effect_size'] = np.sqrt(f2_alt)
>>> power = FTestAnovaPower().power(**kwds)
>>> power
0.5047366512800622
>>>
>>> f2_alt = effectsize_oneway(means_alt, vars_, nobs, use_var="bf")
>>> f2_alt
0.04391324307956788
>>>
>>> kwds['effect_size'] = np.sqrt(f2_alt)
>>> power = FTestAnovaPower().power(**kwds)
>>> power
0.3765792117047725
"""
# the code here is largely a copy of onway_generic with adjustments
means = np.asarray(means)
n_groups = means.shape[0]
if np.size(nobs) == 1:
nobs = np.ones(n_groups) * nobs
nobs_t = nobs.sum()
if use_var == "equal":
if np.size(vars_) == 1:
var_resid = vars_
else:
vars_ = np.asarray(vars_)
var_resid = ((nobs - 1) * vars_).sum() / (nobs_t - n_groups)
vars_ = var_resid # scalar, if broadcasting works
weights = nobs / vars_
w_total = weights.sum()
w_rel = weights / w_total
# meanw_t = (weights * means).sum() / w_total
meanw_t = w_rel @ means
f2 = np.dot(weights, (means - meanw_t)**2) / (nobs_t - ddof_between)
if use_var.lower() == "bf":
weights = nobs
w_total = weights.sum()
w_rel = weights / w_total
meanw_t = w_rel @ means
# TODO: reuse general case with weights
tmp = ((1. - nobs / nobs_t) * vars_).sum()
statistic = 1. * (nobs * (means - meanw_t)**2).sum()
statistic /= tmp
f2 = statistic * (1. - nobs / nobs_t).sum() / nobs_t
# correction factor for df_num in BFM
df_num2 = n_groups - 1
df_num = tmp**2 / ((vars_**2).sum() +
(nobs / nobs_t * vars_).sum()**2 -
2 * (nobs / nobs_t * vars_**2).sum())
f2 *= df_num / df_num2
return f2
[docs]
def convert_effectsize_fsqu(f2=None, eta2=None):
"""Convert squared effect sizes in f family
f2 is signal to noise ratio, var_explained / var_residual
eta2 is proportion of explained variance, var_explained / var_total
uses the relationship:
f2 = eta2 / (1 - eta2)
Parameters
----------
f2 : None or float
Squared Cohen's F effect size. If f2 is not None, then eta2 will be
computed.
eta2 : None or float
Squared eta effect size. If f2 is None and eta2 is not None, then f2 is
computed.
Returns
-------
res : Holder instance
An instance of the Holder class with f2 and eta2 as attributes.
"""
if f2 is not None:
eta2 = 1 / (1 + 1 / f2)
elif eta2 is not None:
f2 = eta2 / (1 - eta2)
res = Holder(f2=f2, eta2=eta2)
return res
[docs]
def _fstat2effectsize(f_stat, df):
"""Compute anova effect size from F-statistic
This might be combined with convert_effectsize_fsqu
Parameters
----------
f_stat : array_like
Test statistic of an F-test
df : tuple
degrees of freedom ``df = (df1, df2)`` where
- df1 : numerator degrees of freedom, number of constraints
- df2 : denominator degrees of freedom, df_resid
Returns
-------
res : Holder instance
This instance contains effect size measures f2, eta2, omega2 and eps2
as attributes.
Notes
-----
This uses the following definitions:
- f2 = f_stat * df1 / df2
- eta2 = f2 / (f2 + 1)
- omega2 = (f2 - df1 / df2) / (f2 + 2)
- eps2 = (f2 - df1 / df2) / (f2 + 1)
This differs from effect size measures in other function which define
``f2 = f_stat * df1 / nobs``
or an equivalent expression for power computation. The noncentrality
index for the hypothesis test is in those cases given by
``nc = f_stat * df1``.
Currently omega2 and eps2 are computed in two different ways. Those
values agree for regular cases but can show different behavior in corner
cases (e.g. zero division).
"""
df1, df2 = df
f2 = f_stat * df1 / df2
eta2 = f2 / (f2 + 1)
omega2_ = (f_stat - 1) / (f_stat + (df2 + 1) / df1)
omega2 = (f2 - df1 / df2) / (f2 + 1 + 1 / df2) # rewrite
eps2_ = (f_stat - 1) / (f_stat + df2 / df1)
eps2 = (f2 - df1 / df2) / (f2 + 1) # rewrite
return Holder(f2=f2, eta2=eta2, omega2=omega2, eps2=eps2, eps2_=eps2_,
omega2_=omega2_)
# conversion functions for Wellek's equivalence effect size
# these are mainly to compare with literature
[docs]
def wellek_to_f2(eps, n_groups):
"""Convert Wellek's effect size (sqrt) to Cohen's f-squared
This computes the following effect size :
f2 = 1 / n_groups * eps**2
Parameters
----------
eps : float or ndarray
Wellek's effect size used in anova equivalence test
n_groups : int
Number of groups in oneway comparison
Returns
-------
f2 : effect size Cohen's f-squared
"""
f2 = 1 / n_groups * eps**2
return f2
[docs]
def f2_to_wellek(f2, n_groups):
"""Convert Cohen's f-squared to Wellek's effect size (sqrt)
This computes the following effect size :
eps = sqrt(n_groups * f2)
Parameters
----------
f2 : float or ndarray
Effect size Cohen's f-squared
n_groups : int
Number of groups in oneway comparison
Returns
-------
eps : float or ndarray
Wellek's effect size used in anova equivalence test
"""
eps = np.sqrt(n_groups * f2)
return eps
[docs]
def fstat_to_wellek(f_stat, n_groups, nobs_mean):
"""Convert F statistic to wellek's effect size eps squared
This computes the following effect size :
es = f_stat * (n_groups - 1) / nobs_mean
Parameters
----------
f_stat : float or ndarray
Test statistic of an F-test.
n_groups : int
Number of groups in oneway comparison
nobs_mean : float or ndarray
Average number of observations across groups.
Returns
-------
eps : float or ndarray
Wellek's effect size used in anova equivalence test
"""
es = f_stat * (n_groups - 1) / nobs_mean
return es
[docs]
def confint_noncentrality(f_stat, df, alpha=0.05,
alternative="two-sided"):
"""
Confidence interval for noncentrality parameter in F-test
This does not yet handle non-negativity constraint on nc.
Currently only two-sided alternative is supported.
Parameters
----------
f_stat : float
df : tuple
degrees of freedom ``df = (df1, df2)`` where
- df1 : numerator degrees of freedom, number of constraints
- df2 : denominator degrees of freedom, df_resid
alpha : float, default 0.05
alternative : {"two-sided"}
Other alternatives have not been implements.
Returns
-------
float
The end point of the confidence interval.
Notes
-----
The algorithm inverts the cdf of the noncentral F distribution with
respect to the noncentrality parameters.
See Steiger 2004 and references cited in it.
References
----------
.. [1] Steiger, James H. 2004. “Beyond the F Test: Effect Size Confidence
Intervals and Tests of Close Fit in the Analysis of Variance and
Contrast Analysis.” Psychological Methods 9 (2): 164–82.
https://doi.org/10.1037/1082-989X.9.2.164.
See Also
--------
confint_effectsize_oneway
"""
df1, df2 = df
if alternative in ["two-sided", "2s", "ts"]:
alpha1s = alpha / 2
ci = ncfdtrinc(df1, df2, [1 - alpha1s, alpha1s], f_stat)
else:
raise NotImplementedError
return ci
[docs]
def confint_effectsize_oneway(f_stat, df, alpha=0.05, nobs=None):
"""
Confidence interval for effect size in oneway anova for F distribution
This does not yet handle non-negativity constraint on nc.
Currently only two-sided alternative is supported.
Parameters
----------
f_stat : float
df : tuple
degrees of freedom ``df = (df1, df2)`` where
- df1 : numerator degrees of freedom, number of constraints
- df2 : denominator degrees of freedom, df_resid
alpha : float, default 0.05
nobs : int, default None
Returns
-------
Holder
Class with effect size and confidence attributes
Notes
-----
The confidence interval for the noncentrality parameter is obtained by
inverting the cdf of the noncentral F distribution. Confidence intervals
for other effect sizes are computed by endpoint transformation.
R package ``effectsize`` does not compute the confidence intervals in the
same way. Their confidence intervals can be replicated with
>>> ci_nc = confint_noncentrality(f_stat, df1, df2, alpha=0.1)
>>> ci_es = smo._fstat2effectsize(ci_nc / df1, df1, df2)
See Also
--------
confint_noncentrality
"""
df1, df2 = df
if nobs is None:
nobs = df1 + df2 + 1
ci_nc = confint_noncentrality(f_stat, df, alpha=alpha)
ci_f2 = ci_nc / nobs
ci_res = convert_effectsize_fsqu(f2=ci_f2)
ci_res.ci_omega2 = (ci_f2 - df1 / df2) / (ci_f2 + 1 + 1 / df2)
ci_res.ci_nc = ci_nc
ci_res.ci_f = np.sqrt(ci_res.f2)
ci_res.ci_eta = np.sqrt(ci_res.eta2)
ci_res.ci_f_corrected = np.sqrt(ci_res.f2 * (df1 + 1) / df1)
return ci_res
[docs]
def anova_generic(means, variances, nobs, use_var="unequal",
welch_correction=True, info=None):
"""
Oneway Anova based on summary statistics
Parameters
----------
means : array_like
Mean of samples to be compared
variances : float or array_like
Residual (within) variance of each sample or pooled.
If ``variances`` is scalar, then it is interpreted as pooled variance
that is the same for all samples, ``use_var`` will be ignored.
Otherwise, the variances are used depending on the ``use_var`` keyword.
nobs : int or array_like
Number of observations for the samples.
If nobs is scalar, then it is assumed that all samples have the same
number ``nobs`` of observation, i.e. a balanced sample case.
Otherwise, statistics will be weighted corresponding to nobs.
Only relative sizes are relevant, any proportional change to nobs does
not change the effect size.
use_var : {"unequal", "equal", "bf"}
If ``use_var`` is "unequal", then the variances can differ across
samples and the effect size for Welch anova will be computed.
welch_correction : bool
If this is false, then the Welch correction to the test statistic is
not included. This allows the computation of an effect size measure
that corresponds more closely to Cohen's f.
info : not used yet
Returns
-------
res : results instance
This includes `statistic` and `pvalue`.
"""
options = {"use_var": use_var,
"welch_correction": welch_correction
}
if means.ndim != 1:
raise ValueError('data (means, ...) has to be one-dimensional')
nobs_t = nobs.sum()
n_groups = len(means)
# mean_t = (nobs * means).sum() / nobs_t
if use_var == "unequal":
weights = nobs / variances
else:
weights = nobs
w_total = weights.sum()
w_rel = weights / w_total
# meanw_t = (weights * means).sum() / w_total
meanw_t = w_rel @ means
statistic = np.dot(weights, (means - meanw_t)**2) / (n_groups - 1.)
df_num = n_groups - 1.
if use_var == "unequal":
tmp = ((1 - w_rel)**2 / (nobs - 1)).sum() / (n_groups**2 - 1)
if welch_correction:
statistic /= 1 + 2 * (n_groups - 2) * tmp
df_denom = 1. / (3. * tmp)
elif use_var == "equal":
# variance of group demeaned total sample, pooled var_resid
tmp = ((nobs - 1) * variances).sum() / (nobs_t - n_groups)
statistic /= tmp
df_denom = nobs_t - n_groups
elif use_var == "bf":
tmp = ((1. - nobs / nobs_t) * variances).sum()
statistic = 1. * (nobs * (means - meanw_t)**2).sum()
statistic /= tmp
df_num2 = n_groups - 1
df_denom = tmp**2 / ((1. - nobs / nobs_t) ** 2 *
variances ** 2 / (nobs - 1)).sum()
df_num = tmp**2 / ((variances ** 2).sum() +
(nobs / nobs_t * variances).sum() ** 2 -
2 * (nobs / nobs_t * variances ** 2).sum())
pval2 = stats.f.sf(statistic, df_num2, df_denom)
options["df2"] = (df_num2, df_denom)
options["df_num2"] = df_num2
options["pvalue2"] = pval2
else:
raise ValueError('use_var is to be one of "unequal", "equal" or "bf"')
pval = stats.f.sf(statistic, df_num, df_denom)
res = HolderTuple(statistic=statistic,
pvalue=pval,
df=(df_num, df_denom),
df_num=df_num,
df_denom=df_denom,
nobs_t=nobs_t,
n_groups=n_groups,
means=means,
nobs=nobs,
vars_=variances,
**options
)
return res
[docs]
def anova_oneway(data, groups=None, use_var="unequal", welch_correction=True,
trim_frac=0):
"""Oneway Anova
This implements standard anova, Welch and Brown-Forsythe, and trimmed
(Yuen) variants of those.
Parameters
----------
data : tuple of array_like or DataFrame or Series
Data for k independent samples, with k >= 2.
The data can be provided as a tuple or list of arrays or in long
format with outcome observations in ``data`` and group membership in
``groups``.
groups : ndarray or Series
If data is in long format, then groups is needed as indicator to which
group or sample and observations belongs.
use_var : {"unequal", "equal" or "bf"}
`use_var` specified how to treat heteroscedasticity, unequal variance,
across samples. Three approaches are available
"unequal" : Variances are not assumed to be equal across samples.
Heteroscedasticity is taken into account with Welch Anova and
Satterthwaite-Welch degrees of freedom.
This is the default.
"equal" : Variances are assumed to be equal across samples.
This is the standard Anova.
"bf" : Variances are not assumed to be equal across samples.
The method is Browne-Forsythe (1971) for testing equality of means
with the corrected degrees of freedom by Merothra. The original BF
degrees of freedom are available as additional attributes in the
results instance, ``df_denom2`` and ``p_value2``.
welch_correction : bool
If this is false, then the Welch correction to the test statistic is
not included. This allows the computation of an effect size measure
that corresponds more closely to Cohen's f.
trim_frac : float in [0, 0.5)
Optional trimming for Anova with trimmed mean and winsorized variances.
With the default trim_frac equal to zero, the oneway Anova statistics
are computed without trimming. If `trim_frac` is larger than zero,
then the largest and smallest observations in each sample are trimmed.
The number of trimmed observations is the fraction of number of
observations in the sample truncated to the next lower integer.
`trim_frac` has to be smaller than 0.5, however, if the fraction is
so large that there are not enough observations left over, then `nan`
will be returned.
Returns
-------
res : results instance
The returned HolderTuple instance has the following main attributes
and some additional information in other attributes.
statistic : float
Test statistic for k-sample mean comparison which is approximately
F-distributed.
pvalue : float
If ``use_var="bf"``, then the p-value is based on corrected
degrees of freedom following Mehrotra 1997.
pvalue2 : float
This is the p-value based on degrees of freedom as in
Brown-Forsythe 1974 and is only available if ``use_var="bf"``.
df = (df_denom, df_num) : tuple of floats
Degreeds of freedom for the F-distribution depend on ``use_var``.
If ``use_var="bf"``, then `df_denom` is for Mehrotra p-values
`df_denom2` is available for Brown-Forsythe 1974 p-values.
`df_num` is the same numerator degrees of freedom for both
p-values.
Notes
-----
Welch's anova is correctly sized (not liberal or conservative) in smaller
samples if the distribution of the samples is not very far away from the
normal distribution. The test can become liberal if the data is strongly
skewed. Welch's Anova can also be correctly sized for discrete
distributions with finite support, like Lickert scale data.
The trimmed version is robust to many non-normal distributions, it stays
correctly sized in many cases, and is more powerful in some cases with
skewness or heavy tails.
Trimming is currently based on the integer part of ``nobs * trim_frac``.
The default might change to including fractional observations as in the
original articles by Yuen.
See Also
--------
anova_generic
References
----------
Brown, Morton B., and Alan B. Forsythe. 1974. “The Small Sample Behavior
of Some Statistics Which Test the Equality of Several Means.”
Technometrics 16 (1) (February 1): 129–132. doi:10.2307/1267501.
Mehrotra, Devan V. 1997. “Improving the Brown-Forsythe Solution to the
Generalized Behrens-Fisher Problem.” Communications in Statistics -
Simulation and Computation 26 (3): 1139–1145.
doi:10.1080/03610919708813431.
"""
if groups is not None:
uniques = np.unique(groups)
data = [data[groups == uni] for uni in uniques]
else:
# uniques = None # not used yet, add to info?
pass
args = list(map(np.asarray, data))
if any([x.ndim != 1 for x in args]):
raise ValueError('data arrays have to be one-dimensional')
nobs = np.array([len(x) for x in args], float)
# n_groups = len(args) # not used
# means = np.array([np.mean(x, axis=0) for x in args], float)
# vars_ = np.array([np.var(x, ddof=1, axis=0) for x in args], float)
if trim_frac == 0:
means = np.array([x.mean() for x in args])
vars_ = np.array([x.var(ddof=1) for x in args])
else:
tms = [TrimmedMean(x, trim_frac) for x in args]
means = np.array([tm.mean_trimmed for tm in tms])
# R doesn't use uncorrected var_winsorized
# vars_ = np.array([tm.var_winsorized for tm in tms])
vars_ = np.array([tm.var_winsorized * (tm.nobs - 1) /
(tm.nobs_reduced - 1) for tm in tms])
# nobs_original = nobs # store just in case
nobs = np.array([tm.nobs_reduced for tm in tms])
res = anova_generic(means, vars_, nobs, use_var=use_var,
welch_correction=welch_correction)
return res
[docs]
def equivalence_oneway_generic(f_stat, n_groups, nobs, equiv_margin, df,
alpha=0.05, margin_type="f2"):
"""Equivalence test for oneway anova (Wellek and extensions)
This is an helper function when summary statistics are available.
Use `equivalence_oneway` instead.
The null hypothesis is that the means differ by more than `equiv_margin`
in the anova distance measure.
If the Null is rejected, then the data supports that means are equivalent,
i.e. within a given distance.
Parameters
----------
f_stat : float
F-statistic
n_groups : int
Number of groups in oneway comparison.
nobs : ndarray
Array of number of observations in groups.
equiv_margin : float
Equivalence margin in terms of effect size. Effect size can be chosen
with `margin_type`. default is squared Cohen's f.
df : tuple
degrees of freedom ``df = (df1, df2)`` where
- df1 : numerator degrees of freedom, number of constraints
- df2 : denominator degrees of freedom, df_resid
alpha : float in (0, 1)
Significance level for the hypothesis test.
margin_type : "f2" or "wellek"
Type of effect size used for equivalence margin.
Returns
-------
results : instance of HolderTuple class
The two main attributes are test statistic `statistic` and p-value
`pvalue`.
Notes
-----
Equivalence in this function is defined in terms of a squared distance
measure similar to Mahalanobis distance.
Alternative definitions for the oneway case are based on maximum difference
between pairs of means or similar pairwise distances.
The equivalence margin is used for the noncentrality parameter in the
noncentral F distribution for the test statistic. In samples with unequal
variances estimated using Welch or Brown-Forsythe Anova, the f-statistic
depends on the unequal variances and corrections to the test statistic.
This means that the equivalence margins are not fully comparable across
methods for treating unequal variances.
References
----------
Wellek, Stefan. 2010. Testing Statistical Hypotheses of Equivalence and
Noninferiority. 2nd ed. Boca Raton: CRC Press.
Cribbie, Robert A., Chantal A. Arpin-Cribbie, and Jamie A. Gruman. 2009.
“Tests of Equivalence for One-Way Independent Groups Designs.” The Journal
of Experimental Education 78 (1): 1–13.
https://doi.org/10.1080/00220970903224552.
Jan, Show-Li, and Gwowen Shieh. 2019. “On the Extended Welch Test for
Assessing Equivalence of Standardized Means.” Statistics in
Biopharmaceutical Research 0 (0): 1–8.
https://doi.org/10.1080/19466315.2019.1654915.
"""
nobs_t = nobs.sum()
nobs_mean = nobs_t / n_groups
if margin_type == "wellek":
nc_null = nobs_mean * equiv_margin**2
es = f_stat * (n_groups - 1) / nobs_mean
type_effectsize = "Wellek's psi_squared"
elif margin_type in ["f2", "fsqu", "fsquared"]:
nc_null = nobs_t * equiv_margin
es = f_stat / nobs_t
type_effectsize = "Cohen's f_squared"
else:
raise ValueError('`margin_type` should be "f2" or "wellek"')
crit_f = ncf_ppf(alpha, df[0], df[1], nc_null)
if margin_type == "wellek":
# TODO: do we need a sqrt
crit_es = crit_f * (n_groups - 1) / nobs_mean
elif margin_type in ["f2", "fsqu", "fsquared"]:
crit_es = crit_f / nobs_t
reject = (es < crit_es)
pv = ncf_cdf(f_stat, df[0], df[1], nc_null)
pwr = ncf_cdf(crit_f, df[0], df[1], 1e-13) # scipy, cannot be 0
res = HolderTuple(statistic=f_stat,
pvalue=pv,
effectsize=es, # match es type to margin_type
crit_f=crit_f,
crit_es=crit_es,
reject=reject,
power_zero=pwr,
df=df,
f_stat=f_stat,
type_effectsize=type_effectsize
)
return res
[docs]
def equivalence_oneway(data, equiv_margin, groups=None, use_var="unequal",
welch_correction=True, trim_frac=0, margin_type="f2"):
"""equivalence test for oneway anova (Wellek's Anova)
The null hypothesis is that the means differ by more than `equiv_margin`
in the anova distance measure.
If the Null is rejected, then the data supports that means are equivalent,
i.e. within a given distance.
Parameters
----------
data : tuple of array_like or DataFrame or Series
Data for k independent samples, with k >= 2.
The data can be provided as a tuple or list of arrays or in long
format with outcome observations in ``data`` and group membership in
``groups``.
equiv_margin : float
Equivalence margin in terms of effect size. Effect size can be chosen
with `margin_type`. default is squared Cohen's f.
groups : ndarray or Series
If data is in long format, then groups is needed as indicator to which
group or sample and observations belongs.
use_var : {"unequal", "equal" or "bf"}
`use_var` specified how to treat heteroscedasticity, unequal variance,
across samples. Three approaches are available
"unequal" : Variances are not assumed to be equal across samples.
Heteroscedasticity is taken into account with Welch Anova and
Satterthwaite-Welch degrees of freedom.
This is the default.
"equal" : Variances are assumed to be equal across samples.
This is the standard Anova.
"bf: Variances are not assumed to be equal across samples.
The method is Browne-Forsythe (1971) for testing equality of means
with the corrected degrees of freedom by Merothra. The original BF
degrees of freedom are available as additional attributes in the
results instance, ``df_denom2`` and ``p_value2``.
welch_correction : bool
If this is false, then the Welch correction to the test statistic is
not included. This allows the computation of an effect size measure
that corresponds more closely to Cohen's f.
trim_frac : float in [0, 0.5)
Optional trimming for Anova with trimmed mean and winsorized variances.
With the default trim_frac equal to zero, the oneway Anova statistics
are computed without trimming. If `trim_frac` is larger than zero,
then the largest and smallest observations in each sample are trimmed.
The number of trimmed observations is the fraction of number of
observations in the sample truncated to the next lower integer.
`trim_frac` has to be smaller than 0.5, however, if the fraction is
so large that there are not enough observations left over, then `nan`
will be returned.
margin_type : "f2" or "wellek"
Type of effect size used for equivalence margin, either squared
Cohen's f or Wellek's psi. Default is "f2".
Returns
-------
results : instance of HolderTuple class
The two main attributes are test statistic `statistic` and p-value
`pvalue`.
See Also
--------
anova_oneway
equivalence_scale_oneway
"""
# use anova to compute summary statistics and f-statistic
res0 = anova_oneway(data, groups=groups, use_var=use_var,
welch_correction=welch_correction,
trim_frac=trim_frac)
f_stat = res0.statistic
res = equivalence_oneway_generic(f_stat, res0.n_groups, res0.nobs_t,
equiv_margin, res0.df, alpha=0.05,
margin_type=margin_type)
return res
[docs]
def _power_equivalence_oneway_emp(f_stat, n_groups, nobs, eps, df, alpha=0.05):
"""Empirical power of oneway equivalence test
This only returns post-hoc, empirical power.
Warning: eps is currently effect size margin as defined as in Wellek, and
not the signal to noise ratio (Cohen's f family).
Parameters
----------
f_stat : float
F-statistic from oneway anova, used to compute empirical effect size
n_groups : int
Number of groups in oneway comparison.
nobs : ndarray
Array of number of observations in groups.
eps : float
Equivalence margin in terms of effect size given by Wellek's psi.
df : tuple
Degrees of freedom for F distribution.
alpha : float in (0, 1)
Significance level for the hypothesis test.
Returns
-------
pow : float
Ex-post, post-hoc or empirical power at f-statistic of the equivalence
test.
"""
res = equivalence_oneway_generic(f_stat, n_groups, nobs, eps, df,
alpha=alpha, margin_type="wellek")
nobs_mean = nobs.sum() / n_groups
fn = f_stat # post-hoc power, empirical power at estimate
esn = fn * (n_groups - 1) / nobs_mean # Wellek psi
pow_ = ncf_cdf(res.crit_f, df[0], df[1], nobs_mean * esn)
return pow_
[docs]
def power_equivalence_oneway(f2_alt, equiv_margin, nobs_t, n_groups=None,
df=None, alpha=0.05, margin_type="f2"):
"""
Power of oneway equivalence test
Parameters
----------
f2_alt : float
Effect size, squared Cohen's f, under the alternative.
equiv_margin : float
Equivalence margin in terms of effect size. Effect size can be chosen
with `margin_type`. default is squared Cohen's f.
nobs_t : ndarray
Total number of observations summed over all groups.
n_groups : int
Number of groups in oneway comparison. If margin_type is "wellek",
then either ``n_groups`` or ``df`` has to be given.
df : tuple
Degrees of freedom for F distribution,
``df = (n_groups - 1, nobs_t - n_groups)``
alpha : float in (0, 1)
Significance level for the hypothesis test.
margin_type : "f2" or "wellek"
Type of effect size used for equivalence margin, either squared
Cohen's f or Wellek's psi. Default is "f2".
Returns
-------
pow_alt : float
Power of the equivalence test at given equivalence effect size under
the alternative.
"""
# one of n_groups or df has to be specified
if df is None:
if n_groups is None:
raise ValueError("either df or n_groups has to be provided")
df = (n_groups - 1, nobs_t - n_groups)
# esn = fn * (n_groups - 1) / nobs_mean # Wellek psi
# fix for scipy, ncf does not allow nc == 0, fixed in scipy master
if f2_alt == 0:
f2_alt = 1e-13
# effect size, critical value at margin
# f2_null = equiv_margin
if margin_type in ["f2", "fsqu", "fsquared"]:
f2_null = equiv_margin
elif margin_type == "wellek":
if n_groups is None:
raise ValueError("If margin_type is wellek, then n_groups has "
"to be provided")
# f2_null = (n_groups - 1) * n_groups / nobs_t * equiv_margin**2
nobs_mean = nobs_t / n_groups
f2_null = nobs_mean * equiv_margin**2 / nobs_t
f2_alt = nobs_mean * f2_alt**2 / nobs_t
else:
raise ValueError('`margin_type` should be "f2" or "wellek"')
crit_f_margin = ncf_ppf(alpha, df[0], df[1], nobs_t * f2_null)
pwr_alt = ncf_cdf(crit_f_margin, df[0], df[1], nobs_t * f2_alt)
return pwr_alt
[docs]
def simulate_power_equivalence_oneway(means, nobs, equiv_margin, vars_=None,
k_mc=1000, trim_frac=0,
options_var=None, margin_type="f2"
): # , anova_options=None): #TODO
"""Simulate Power for oneway equivalence test (Wellek's Anova)
This function is experimental and written to evaluate asymptotic power
function. This function will change without backwards compatibility
constraints. The only part that is stable is `pvalue` attribute in results.
Effect size for equivalence margin
"""
if options_var is None:
options_var = ["unequal", "equal", "bf"]
if vars_ is not None:
stds = np.sqrt(vars_)
else:
stds = np.ones(len(means))
nobs_mean = nobs.mean()
n_groups = len(nobs)
res_mc = []
f_mc = []
reject_mc = []
other_mc = []
for _ in range(k_mc):
y0, y1, y2, y3 = (m + std * np.random.randn(n)
for (n, m, std) in zip(nobs, means, stds))
res_i = []
f_i = []
reject_i = []
other_i = []
for uv in options_var:
# for welch in options_welch:
# res1 = sma.anova_generic(means, vars_, nobs, use_var=uv,
# welch_correction=welch)
res0 = anova_oneway([y0, y1, y2, y3], use_var=uv,
trim_frac=trim_frac)
f_stat = res0.statistic
res1 = equivalence_oneway_generic(f_stat, n_groups, nobs.sum(),
equiv_margin, res0.df,
alpha=0.05,
margin_type=margin_type)
res_i.append(res1.pvalue)
es_wellek = f_stat * (n_groups - 1) / nobs_mean
f_i.append(es_wellek)
reject_i.append(res1.reject)
other_i.extend([res1.crit_f, res1.crit_es, res1.power_zero])
res_mc.append(res_i)
f_mc.append(f_i)
reject_mc.append(reject_i)
other_mc.append(other_i)
f_mc = np.asarray(f_mc)
other_mc = np.asarray(other_mc)
res_mc = np.asarray(res_mc)
reject_mc = np.asarray(reject_mc)
res = Holder(f_stat=f_mc,
other=other_mc,
pvalue=res_mc,
reject=reject_mc
)
return res
[docs]
def test_scale_oneway(data, method="bf", center="median", transform="abs",
trim_frac_mean=0.1, trim_frac_anova=0.0):
"""Oneway Anova test for equal scale, variance or dispersion
This hypothesis test performs a oneway anova test on transformed data and
includes Levene and Brown-Forsythe tests for equal variances as special
cases.
Parameters
----------
data : tuple of array_like or DataFrame or Series
Data for k independent samples, with k >= 2. The data can be provided
as a tuple or list of arrays or in long format with outcome
observations in ``data`` and group membership in ``groups``.
method : {"unequal", "equal" or "bf"}
How to treat heteroscedasticity across samples. This is used as
`use_var` option in `anova_oneway` and refers to the variance of the
transformed data, i.e. assumption is on 4th moment if squares are used
as transform.
Three approaches are available:
"unequal" : Variances are not assumed to be equal across samples.
Heteroscedasticity is taken into account with Welch Anova and
Satterthwaite-Welch degrees of freedom.
This is the default.
"equal" : Variances are assumed to be equal across samples.
This is the standard Anova.
"bf" : Variances are not assumed to be equal across samples.
The method is Browne-Forsythe (1971) for testing equality of means
with the corrected degrees of freedom by Merothra. The original BF
degrees of freedom are available as additional attributes in the
results instance, ``df_denom2`` and ``p_value2``.
center : "median", "mean", "trimmed" or float
Statistic used for centering observations. If a float, then this
value is used to center. Default is median.
transform : "abs", "square" or callable
Transformation for the centered observations. If a callable, then this
function is called on the centered data.
Default is absolute value.
trim_frac_mean=0.1 : float in [0, 0.5)
Trim fraction for the trimmed mean when `center` is "trimmed"
trim_frac_anova : float in [0, 0.5)
Optional trimming for Anova with trimmed mean and Winsorized variances.
With the default trim_frac equal to zero, the oneway Anova statistics
are computed without trimming. If `trim_frac` is larger than zero,
then the largest and smallest observations in each sample are trimmed.
see ``trim_frac`` option in `anova_oneway`
Returns
-------
res : results instance
The returned HolderTuple instance has the following main attributes
and some additional information in other attributes.
statistic : float
Test statistic for k-sample mean comparison which is approximately
F-distributed.
pvalue : float
If ``method="bf"``, then the p-value is based on corrected
degrees of freedom following Mehrotra 1997.
pvalue2 : float
This is the p-value based on degrees of freedom as in
Brown-Forsythe 1974 and is only available if ``method="bf"``.
df : (df_denom, df_num)
Tuple containing degrees of freedom for the F-distribution depend
on ``method``. If ``method="bf"``, then `df_denom` is for Mehrotra
p-values `df_denom2` is available for Brown-Forsythe 1974 p-values.
`df_num` is the same numerator degrees of freedom for both
p-values.
See Also
--------
anova_oneway
scale_transform
"""
data = map(np.asarray, data)
xxd = [scale_transform(x, center=center, transform=transform,
trim_frac=trim_frac_mean) for x in data]
res = anova_oneway(xxd, groups=None, use_var=method,
welch_correction=True, trim_frac=trim_frac_anova)
res.data_transformed = xxd
return res
[docs]
def equivalence_scale_oneway(data, equiv_margin, method='bf', center='median',
transform='abs', trim_frac_mean=0.,
trim_frac_anova=0.):
"""Oneway Anova test for equivalence of scale, variance or dispersion
This hypothesis test performs a oneway equivalence anova test on
transformed data.
Note, the interpretation of the equivalence margin `equiv_margin` will
depend on the transformation of the data. Transformations like
absolute deviation are not scaled to correspond to the variance under
normal distribution.
Parameters
----------
data : tuple of array_like or DataFrame or Series
Data for k independent samples, with k >= 2. The data can be provided
as a tuple or list of arrays or in long format with outcome
observations in ``data`` and group membership in ``groups``.
equiv_margin : float
Equivalence margin in terms of effect size. Effect size can be chosen
with `margin_type`. default is squared Cohen's f.
method : {"unequal", "equal" or "bf"}
How to treat heteroscedasticity across samples. This is used as
`use_var` option in `anova_oneway` and refers to the variance of the
transformed data, i.e. assumption is on 4th moment if squares are used
as transform.
Three approaches are available:
"unequal" : Variances are not assumed to be equal across samples.
Heteroscedasticity is taken into account with Welch Anova and
Satterthwaite-Welch degrees of freedom.
This is the default.
"equal" : Variances are assumed to be equal across samples.
This is the standard Anova.
"bf" : Variances are not assumed to be equal across samples.
The method is Browne-Forsythe (1971) for testing equality of means
with the corrected degrees of freedom by Merothra. The original BF
degrees of freedom are available as additional attributes in the
results instance, ``df_denom2`` and ``p_value2``.
center : "median", "mean", "trimmed" or float
Statistic used for centering observations. If a float, then this
value is used to center. Default is median.
transform : "abs", "square" or callable
Transformation for the centered observations. If a callable, then this
function is called on the centered data.
Default is absolute value.
trim_frac_mean : float in [0, 0.5)
Trim fraction for the trimmed mean when `center` is "trimmed"
trim_frac_anova : float in [0, 0.5)
Optional trimming for Anova with trimmed mean and Winsorized variances.
With the default trim_frac equal to zero, the oneway Anova statistics
are computed without trimming. If `trim_frac` is larger than zero,
then the largest and smallest observations in each sample are trimmed.
see ``trim_frac`` option in `anova_oneway`
Returns
-------
results : instance of HolderTuple class
The two main attributes are test statistic `statistic` and p-value
`pvalue`.
See Also
--------
anova_oneway
scale_transform
equivalence_oneway
"""
data = map(np.asarray, data)
xxd = [scale_transform(x, center=center, transform=transform,
trim_frac=trim_frac_mean) for x in data]
res = equivalence_oneway(xxd, equiv_margin, use_var=method,
welch_correction=True, trim_frac=trim_frac_anova)
res.x_transformed = xxd
return res
Last update:
Oct 29, 2024