Source code for statsmodels.stats.oneway

"""
Created on Wed Mar 18 10:33:38 2020

Author: Josef Perktold
License: BSD-3

"""

import numpy as np
from scipy import stats
from scipy.special import ncfdtrinc

# functions that use scipy.special instead of boost based function in stats
from statsmodels.stats.power import ncf_cdf, ncf_ppf

from statsmodels.stats.robust_compare import TrimmedMean, scale_transform
from statsmodels.tools.testing import Holder
from statsmodels.stats.base import HolderTuple



[docs]
def effectsize_oneway(means, vars_, nobs, use_var="unequal", ddof_between=0):
    """
    Effect size corresponding to Cohen's f = nc / nobs for oneway anova

    This contains adjustment for Welch and Brown-Forsythe Anova so that
    effect size can be used with FTestAnovaPower.

    Parameters
    ----------
    means : array_like
        Mean of samples to be compared
    vars_ : float or array_like
        Residual (within) variance of each sample or pooled
        If ``vars_`` is scalar, then it is interpreted as pooled variance that
        is the same for all samples, ``use_var`` will be ignored.
        Otherwise, the variances are used depending on the ``use_var`` keyword.
    nobs : int or array_like
        Number of observations for the samples.
        If nobs is scalar, then it is assumed that all samples have the same
        number ``nobs`` of observation, i.e. a balanced sample case.
        Otherwise, statistics will be weighted corresponding to nobs.
        Only relative sizes are relevant, any proportional change to nobs does
        not change the effect size.
    use_var : {"unequal", "equal", "bf"}
        If ``use_var`` is "unequal", then the variances can differ across
        samples and the effect size for Welch anova will be computed.
    ddof_between : int
        Degrees of freedom correction for the weighted between sum of squares.
        The denominator is ``nobs_total - ddof_between``
        This can be used to match differences across reference literature.

    Returns
    -------
    f2 : float
        Effect size corresponding to squared Cohen's f, which is also equal
        to the noncentrality divided by total number of observations.

    Notes
    -----
    This currently handles the following cases for oneway anova

    - balanced sample with homoscedastic variances
    - samples with different number of observations and with homoscedastic
      variances
    - samples with different number of observations and with heteroskedastic
      variances. This corresponds to Welch anova

    In the case of "unequal" and "bf" methods for unequal variances, the
    effect sizes do not directly correspond to the test statistic in Anova.
    Both have correction terms dropped or added, so the effect sizes match up
    with using FTestAnovaPower.
    If all variances are equal, then all three methods result in the same
    effect size. If variances are unequal, then the three methods produce
    small differences in effect size.

    Note, the effect size and power computation for BF Anova was not found in
    the literature. The correction terms were added so that FTestAnovaPower
    provides a good approximation to the power.

    Status: experimental
    We might add additional returns, if those are needed to support power
    and sample size applications.

    Examples
    --------
    The following shows how to compute effect size and power for each of the
    three anova methods. The null hypothesis is that the means are equal which
    corresponds to a zero effect size. Under the alternative, means differ
    with two sample means at a distance delta from the mean. We assume the
    variance is the same under the null and alternative hypothesis.

    ``nobs`` for the samples defines the fraction of observations in the
    samples. ``nobs`` in the power method defines the total sample size.

    In simulations, the computed power for standard anova,
    i.e.``use_var="equal"`` overestimates the simulated power by a few percent.
    The equal variance assumption does not hold in this example.

    >>> from statsmodels.stats.oneway import effectsize_oneway
    >>> from statsmodels.stats.power import FTestAnovaPower
    >>>
    >>> nobs = np.array([10, 12, 13, 15])
    >>> delta = 0.5
    >>> means_alt = np.array([-1, 0, 0, 1]) * delta
    >>> vars_ = np.arange(1, len(means_alt) + 1)
    >>>
    >>> f2_alt = effectsize_oneway(means_alt, vars_, nobs, use_var="equal")
    >>> f2_alt
    0.04581300813008131
    >>>
    >>> kwds = {'effect_size': np.sqrt(f2_alt), 'nobs': 100, 'alpha': 0.05,
    ...         'k_groups': 4}
    >>> power = FTestAnovaPower().power(**kwds)
    >>> power
    0.39165892158983273
    >>>
    >>> f2_alt = effectsize_oneway(means_alt, vars_, nobs, use_var="unequal")
    >>> f2_alt
    0.060640138408304504
    >>>
    >>> kwds['effect_size'] = np.sqrt(f2_alt)
    >>> power = FTestAnovaPower().power(**kwds)
    >>> power
    0.5047366512800622
    >>>
    >>> f2_alt = effectsize_oneway(means_alt, vars_, nobs, use_var="bf")
    >>> f2_alt
    0.04391324307956788
    >>>
    >>> kwds['effect_size'] = np.sqrt(f2_alt)
    >>> power = FTestAnovaPower().power(**kwds)
    >>> power
    0.3765792117047725

    """
    # the code here is largely a copy of onway_generic with adjustments

    means = np.asarray(means)
    n_groups = means.shape[0]

    if np.size(nobs) == 1:
        nobs = np.ones(n_groups) * nobs

    nobs_t = nobs.sum()

    if use_var == "equal":
        if np.size(vars_) == 1:
            var_resid = vars_
        else:
            vars_ = np.asarray(vars_)
            var_resid = ((nobs - 1) * vars_).sum() / (nobs_t - n_groups)

        vars_ = var_resid  # scalar, if broadcasting works

    weights = nobs / vars_

    w_total = weights.sum()
    w_rel = weights / w_total
    # meanw_t = (weights * means).sum() / w_total
    meanw_t = w_rel @ means

    f2 = np.dot(weights, (means - meanw_t)**2) / (nobs_t - ddof_between)

    if use_var.lower() == "bf":
        weights = nobs
        w_total = weights.sum()
        w_rel = weights / w_total
        meanw_t = w_rel @ means
        # TODO: reuse general case with weights
        tmp = ((1. - nobs / nobs_t) * vars_).sum()
        statistic = 1. * (nobs * (means - meanw_t)**2).sum()
        statistic /= tmp
        f2 = statistic * (1. - nobs / nobs_t).sum() / nobs_t
        # correction factor for df_num in BFM
        df_num2 = n_groups - 1
        df_num = tmp**2 / ((vars_**2).sum() +
                           (nobs / nobs_t * vars_).sum()**2 -
                           2 * (nobs / nobs_t * vars_**2).sum())
        f2 *= df_num / df_num2

    return f2




[docs]
def convert_effectsize_fsqu(f2=None, eta2=None):
    """Convert squared effect sizes in f family

    f2 is signal to noise ratio, var_explained / var_residual

    eta2 is proportion of explained variance, var_explained / var_total

    uses the relationship:
    f2 = eta2 / (1 - eta2)

    Parameters
    ----------
    f2 : None or float
       Squared Cohen's F effect size. If f2 is not None, then eta2 will be
       computed.
    eta2 : None or float
       Squared eta effect size. If f2 is None and eta2 is not None, then f2 is
       computed.

    Returns
    -------
    res : Holder instance
        An instance of the Holder class with f2 and eta2 as attributes.

    """
    if f2 is not None:
        eta2 = 1 / (1 + 1 / f2)

    elif eta2 is not None:
        f2 = eta2 / (1 - eta2)

    res = Holder(f2=f2, eta2=eta2)
    return res




[docs]
def _fstat2effectsize(f_stat, df):
    """Compute anova effect size from F-statistic

    This might be combined with convert_effectsize_fsqu

    Parameters
    ----------
    f_stat : array_like
        Test statistic of an F-test
    df : tuple
        degrees of freedom ``df = (df1, df2)`` where
         - df1 : numerator degrees of freedom, number of constraints
         - df2 : denominator degrees of freedom, df_resid

    Returns
    -------
    res : Holder instance
        This instance contains effect size measures f2, eta2, omega2 and eps2
        as attributes.

    Notes
    -----
    This uses the following definitions:

    - f2 = f_stat * df1 / df2
    - eta2 = f2 / (f2 + 1)
    - omega2 = (f2 - df1 / df2) / (f2 + 2)
    - eps2 = (f2 - df1 / df2) / (f2 + 1)

    This differs from effect size measures in other function which define
    ``f2 = f_stat * df1 / nobs``
    or an equivalent expression for power computation. The noncentrality
    index for the hypothesis test is in those cases given by
    ``nc = f_stat * df1``.

    Currently omega2 and eps2 are computed in two different ways. Those
    values agree for regular cases but can show different behavior in corner
    cases (e.g. zero division).

    """
    df1, df2 = df
    f2 = f_stat * df1 / df2
    eta2 = f2 / (f2 + 1)
    omega2_ = (f_stat - 1) / (f_stat + (df2 + 1) / df1)
    omega2 = (f2 - df1 / df2) / (f2 + 1 + 1 / df2)  # rewrite
    eps2_ = (f_stat - 1) / (f_stat + df2 / df1)
    eps2 = (f2 - df1 / df2) / (f2 + 1)  # rewrite
    return Holder(f2=f2, eta2=eta2, omega2=omega2, eps2=eps2, eps2_=eps2_,
                  omega2_=omega2_)



# conversion functions for Wellek's equivalence effect size
# these are mainly to compare with literature


[docs]
def wellek_to_f2(eps, n_groups):
    """Convert Wellek's effect size (sqrt) to Cohen's f-squared

    This computes the following effect size :

       f2 = 1 / n_groups * eps**2

    Parameters
    ----------
    eps : float or ndarray
        Wellek's effect size used in anova equivalence test
    n_groups : int
        Number of groups in oneway comparison

    Returns
    -------
    f2 : effect size Cohen's f-squared

    """
    f2 = 1 / n_groups * eps**2
    return f2




[docs]
def f2_to_wellek(f2, n_groups):
    """Convert Cohen's f-squared to Wellek's effect size (sqrt)

    This computes the following effect size :

       eps = sqrt(n_groups * f2)

    Parameters
    ----------
    f2 : float or ndarray
        Effect size Cohen's f-squared
    n_groups : int
        Number of groups in oneway comparison

    Returns
    -------
    eps : float or ndarray
        Wellek's effect size used in anova equivalence test
    """
    eps = np.sqrt(n_groups * f2)
    return eps




[docs]
def fstat_to_wellek(f_stat, n_groups, nobs_mean):
    """Convert F statistic to wellek's effect size eps squared

    This computes the following effect size :

       es = f_stat * (n_groups - 1) / nobs_mean

    Parameters
    ----------
    f_stat : float or ndarray
        Test statistic of an F-test.
    n_groups : int
        Number of groups in oneway comparison
    nobs_mean : float or ndarray
        Average number of observations across groups.

    Returns
    -------
    eps : float or ndarray
        Wellek's effect size used in anova equivalence test

    """
    es = f_stat * (n_groups - 1) / nobs_mean
    return es




[docs]
def confint_noncentrality(f_stat, df, alpha=0.05,
                          alternative="two-sided"):
    """
    Confidence interval for noncentrality parameter in F-test

    This does not yet handle non-negativity constraint on nc.
    Currently only two-sided alternative is supported.

    Parameters
    ----------
    f_stat : float
    df : tuple
        degrees of freedom ``df = (df1, df2)`` where

        - df1 : numerator degrees of freedom, number of constraints
        - df2 : denominator degrees of freedom, df_resid

    alpha : float, default 0.05
    alternative : {"two-sided"}
        Other alternatives have not been implements.

    Returns
    -------
    float
        The end point of the confidence interval.

    Notes
    -----
    The algorithm inverts the cdf of the noncentral F distribution with
    respect to the noncentrality parameters.
    See Steiger 2004 and references cited in it.

    References
    ----------
    .. [1] Steiger, James H. 2004. “Beyond the F Test: Effect Size Confidence
       Intervals and Tests of Close Fit in the Analysis of Variance and
       Contrast Analysis.” Psychological Methods 9 (2): 164–82.
       https://doi.org/10.1037/1082-989X.9.2.164.

    See Also
    --------
    confint_effectsize_oneway
    """

    df1, df2 = df
    if alternative in ["two-sided", "2s", "ts"]:
        alpha1s = alpha / 2
        ci = ncfdtrinc(df1, df2, [1 - alpha1s, alpha1s], f_stat)
    else:
        raise NotImplementedError

    return ci




[docs]
def confint_effectsize_oneway(f_stat, df, alpha=0.05, nobs=None):
    """
    Confidence interval for effect size in oneway anova for F distribution

    This does not yet handle non-negativity constraint on nc.
    Currently only two-sided alternative is supported.

    Parameters
    ----------
    f_stat : float
    df : tuple
        degrees of freedom ``df = (df1, df2)`` where

        - df1 : numerator degrees of freedom, number of constraints
        - df2 : denominator degrees of freedom, df_resid

    alpha : float, default 0.05
    nobs : int, default None

    Returns
    -------
    Holder
        Class with effect size and confidence attributes

    Notes
    -----
    The confidence interval for the noncentrality parameter is obtained by
    inverting the cdf of the noncentral F distribution. Confidence intervals
    for other effect sizes are computed by endpoint transformation.


    R package ``effectsize`` does not compute the confidence intervals in the
    same way. Their confidence intervals can be replicated with

    >>> ci_nc = confint_noncentrality(f_stat, df1, df2, alpha=0.1)
    >>> ci_es = smo._fstat2effectsize(ci_nc / df1, df1, df2)

    See Also
    --------
    confint_noncentrality
    """

    df1, df2 = df
    if nobs is None:
        nobs = df1 + df2 + 1
    ci_nc = confint_noncentrality(f_stat, df, alpha=alpha)

    ci_f2 = ci_nc / nobs
    ci_res = convert_effectsize_fsqu(f2=ci_f2)
    ci_res.ci_omega2 = (ci_f2 - df1 / df2) / (ci_f2 + 1 + 1 / df2)
    ci_res.ci_nc = ci_nc
    ci_res.ci_f = np.sqrt(ci_res.f2)
    ci_res.ci_eta = np.sqrt(ci_res.eta2)
    ci_res.ci_f_corrected = np.sqrt(ci_res.f2 * (df1 + 1) / df1)

    return ci_res




[docs]
def anova_generic(means, variances, nobs, use_var="unequal",
                  welch_correction=True, info=None):
    """
    Oneway Anova based on summary statistics

    Parameters
    ----------
    means : array_like
        Mean of samples to be compared
    variances : float or array_like
        Residual (within) variance of each sample or pooled.
        If ``variances`` is scalar, then it is interpreted as pooled variance
        that is the same for all samples, ``use_var`` will be ignored.
        Otherwise, the variances are used depending on the ``use_var`` keyword.
    nobs : int or array_like
        Number of observations for the samples.
        If nobs is scalar, then it is assumed that all samples have the same
        number ``nobs`` of observation, i.e. a balanced sample case.
        Otherwise, statistics will be weighted corresponding to nobs.
        Only relative sizes are relevant, any proportional change to nobs does
        not change the effect size.
    use_var : {"unequal", "equal", "bf"}
        If ``use_var`` is "unequal", then the variances can differ across
        samples and the effect size for Welch anova will be computed.
    welch_correction : bool
        If this is false, then the Welch correction to the test statistic is
        not included. This allows the computation of an effect size measure
        that corresponds more closely to Cohen's f.
    info : not used yet

    Returns
    -------
    res : results instance
        This includes `statistic` and `pvalue`.

    """
    options = {"use_var": use_var,
               "welch_correction": welch_correction
               }
    if means.ndim != 1:
        raise ValueError('data (means, ...) has to be one-dimensional')
    nobs_t = nobs.sum()
    n_groups = len(means)
    # mean_t = (nobs * means).sum() / nobs_t
    if use_var == "unequal":
        weights = nobs / variances
    else:
        weights = nobs

    w_total = weights.sum()
    w_rel = weights / w_total
    # meanw_t = (weights * means).sum() / w_total
    meanw_t = w_rel @ means

    statistic = np.dot(weights, (means - meanw_t)**2) / (n_groups - 1.)
    df_num = n_groups - 1.

    if use_var == "unequal":
        tmp = ((1 - w_rel)**2 / (nobs - 1)).sum() / (n_groups**2 - 1)
        if welch_correction:
            statistic /= 1 + 2 * (n_groups - 2) * tmp
        df_denom = 1. / (3. * tmp)

    elif use_var == "equal":
        # variance of group demeaned total sample, pooled var_resid
        tmp = ((nobs - 1) * variances).sum() / (nobs_t - n_groups)
        statistic /= tmp
        df_denom = nobs_t - n_groups

    elif use_var == "bf":
        tmp = ((1. - nobs / nobs_t) * variances).sum()
        statistic = 1. * (nobs * (means - meanw_t)**2).sum()
        statistic /= tmp

        df_num2 = n_groups - 1
        df_denom = tmp**2 / ((1. - nobs / nobs_t) ** 2 *
                             variances ** 2 / (nobs - 1)).sum()
        df_num = tmp**2 / ((variances ** 2).sum() +
                           (nobs / nobs_t * variances).sum() ** 2 -
                           2 * (nobs / nobs_t * variances ** 2).sum())
        pval2 = stats.f.sf(statistic, df_num2, df_denom)
        options["df2"] = (df_num2, df_denom)
        options["df_num2"] = df_num2
        options["pvalue2"] = pval2

    else:
        raise ValueError('use_var is to be one of "unequal", "equal" or "bf"')

    pval = stats.f.sf(statistic, df_num, df_denom)
    res = HolderTuple(statistic=statistic,
                      pvalue=pval,
                      df=(df_num, df_denom),
                      df_num=df_num,
                      df_denom=df_denom,
                      nobs_t=nobs_t,
                      n_groups=n_groups,
                      means=means,
                      nobs=nobs,
                      vars_=variances,
                      **options
                      )
    return res




[docs]
def anova_oneway(data, groups=None, use_var="unequal", welch_correction=True,
                 trim_frac=0):
    """Oneway Anova

    This implements standard anova, Welch and Brown-Forsythe, and trimmed
    (Yuen) variants of those.

    Parameters
    ----------
    data : tuple of array_like or DataFrame or Series
        Data for k independent samples, with k >= 2.
        The data can be provided as a tuple or list of arrays or in long
        format with outcome observations in ``data`` and group membership in
        ``groups``.
    groups : ndarray or Series
        If data is in long format, then groups is needed as indicator to which
        group or sample and observations belongs.
    use_var : {"unequal", "equal" or "bf"}
        `use_var` specified how to treat heteroscedasticity, unequal variance,
        across samples. Three approaches are available

        "unequal" : Variances are not assumed to be equal across samples.
            Heteroscedasticity is taken into account with Welch Anova and
            Satterthwaite-Welch degrees of freedom.
            This is the default.
        "equal" : Variances are assumed to be equal across samples.
            This is the standard Anova.
        "bf" : Variances are not assumed to be equal across samples.
            The method is Browne-Forsythe (1971) for testing equality of means
            with the corrected degrees of freedom by Merothra. The original BF
            degrees of freedom are available as additional attributes in the
            results instance, ``df_denom2`` and ``p_value2``.

    welch_correction : bool
        If this is false, then the Welch correction to the test statistic is
        not included. This allows the computation of an effect size measure
        that corresponds more closely to Cohen's f.
    trim_frac : float in [0, 0.5)
        Optional trimming for Anova with trimmed mean and winsorized variances.
        With the default trim_frac equal to zero, the oneway Anova statistics
        are computed without trimming. If `trim_frac` is larger than zero,
        then the largest and smallest observations in each sample are trimmed.
        The number of trimmed observations is the fraction of number of
        observations in the sample truncated to the next lower integer.
        `trim_frac` has to be smaller than 0.5, however, if the fraction is
        so large that there are not enough observations left over, then `nan`
        will be returned.

    Returns
    -------
    res : results instance
        The returned HolderTuple instance has the following main attributes
        and some additional information in other attributes.

        statistic : float
            Test statistic for k-sample mean comparison which is approximately
            F-distributed.
        pvalue : float
            If ``use_var="bf"``, then the p-value is based on corrected
            degrees of freedom following Mehrotra 1997.
        pvalue2 : float
            This is the p-value based on degrees of freedom as in
            Brown-Forsythe 1974 and is only available if ``use_var="bf"``.
        df = (df_denom, df_num) : tuple of floats
            Degreeds of freedom for the F-distribution depend on ``use_var``.
            If ``use_var="bf"``, then `df_denom` is for Mehrotra p-values
            `df_denom2` is available for Brown-Forsythe 1974 p-values.
            `df_num` is the same numerator degrees of freedom for both
            p-values.

    Notes
    -----
    Welch's anova is correctly sized (not liberal or conservative) in smaller
    samples if the distribution of the samples is not very far away from the
    normal distribution. The test can become liberal if the data is strongly
    skewed. Welch's Anova can also be correctly sized for discrete
    distributions with finite support, like Lickert scale data.
    The trimmed version is robust to many non-normal distributions, it stays
    correctly sized in many cases, and is more powerful in some cases with
    skewness or heavy tails.

    Trimming is currently based on the integer part of ``nobs * trim_frac``.
    The default might change to including fractional observations as in the
    original articles by Yuen.


    See Also
    --------
    anova_generic

    References
    ----------
    Brown, Morton B., and Alan B. Forsythe. 1974. “The Small Sample Behavior
    of Some Statistics Which Test the Equality of Several Means.”
    Technometrics 16 (1) (February 1): 129–132. doi:10.2307/1267501.

    Mehrotra, Devan V. 1997. “Improving the Brown-Forsythe Solution to the
    Generalized Behrens-Fisher Problem.” Communications in Statistics -
    Simulation and Computation 26 (3): 1139–1145.
    doi:10.1080/03610919708813431.
    """
    if groups is not None:
        uniques = np.unique(groups)
        data = [data[groups == uni] for uni in uniques]
    else:
        # uniques = None  # not used yet, add to info?
        pass
    args = list(map(np.asarray, data))
    if any([x.ndim != 1 for x in args]):
        raise ValueError('data arrays have to be one-dimensional')

    nobs = np.array([len(x) for x in args], float)
    # n_groups = len(args)  # not used
    # means = np.array([np.mean(x, axis=0) for x in args], float)
    # vars_ = np.array([np.var(x, ddof=1, axis=0) for x in args], float)

    if trim_frac == 0:
        means = np.array([x.mean() for x in args])
        vars_ = np.array([x.var(ddof=1) for x in args])
    else:
        tms = [TrimmedMean(x, trim_frac) for x in args]
        means = np.array([tm.mean_trimmed for tm in tms])
        # R doesn't use uncorrected var_winsorized
        # vars_ = np.array([tm.var_winsorized for tm in tms])
        vars_ = np.array([tm.var_winsorized * (tm.nobs - 1) /
                          (tm.nobs_reduced - 1) for tm in tms])
        # nobs_original = nobs  # store just in case
        nobs = np.array([tm.nobs_reduced for tm in tms])

    res = anova_generic(means, vars_, nobs, use_var=use_var,
                        welch_correction=welch_correction)

    return res




[docs]
def equivalence_oneway_generic(f_stat, n_groups, nobs, equiv_margin, df,
                               alpha=0.05, margin_type="f2"):
    """Equivalence test for oneway anova (Wellek and extensions)

    This is an helper function when summary statistics are available.
    Use `equivalence_oneway` instead.

    The null hypothesis is that the means differ by more than `equiv_margin`
    in the anova distance measure.
    If the Null is rejected, then the data supports that means are equivalent,
    i.e. within a given distance.

    Parameters
    ----------
    f_stat : float
        F-statistic
    n_groups : int
        Number of groups in oneway comparison.
    nobs : ndarray
        Array of number of observations in groups.
    equiv_margin : float
        Equivalence margin in terms of effect size. Effect size can be chosen
        with `margin_type`. default is squared Cohen's f.
    df : tuple
        degrees of freedom ``df = (df1, df2)`` where

        - df1 : numerator degrees of freedom, number of constraints
        - df2 : denominator degrees of freedom, df_resid

    alpha : float in (0, 1)
        Significance level for the hypothesis test.
    margin_type : "f2" or "wellek"
        Type of effect size used for equivalence margin.

    Returns
    -------
    results : instance of HolderTuple class
        The two main attributes are test statistic `statistic` and p-value
        `pvalue`.

    Notes
    -----
    Equivalence in this function is defined in terms of a squared distance
    measure similar to Mahalanobis distance.
    Alternative definitions for the oneway case are based on maximum difference
    between pairs of means or similar pairwise distances.

    The equivalence margin is used for the noncentrality parameter in the
    noncentral F distribution for the test statistic. In samples with unequal
    variances estimated using Welch or Brown-Forsythe Anova, the f-statistic
    depends on the unequal variances and corrections to the test statistic.
    This means that the equivalence margins are not fully comparable across
    methods for treating unequal variances.

    References
    ----------
    Wellek, Stefan. 2010. Testing Statistical Hypotheses of Equivalence and
    Noninferiority. 2nd ed. Boca Raton: CRC Press.

    Cribbie, Robert A., Chantal A. Arpin-Cribbie, and Jamie A. Gruman. 2009.
    “Tests of Equivalence for One-Way Independent Groups Designs.” The Journal
    of Experimental Education 78 (1): 1–13.
    https://doi.org/10.1080/00220970903224552.

    Jan, Show-Li, and Gwowen Shieh. 2019. “On the Extended Welch Test for
    Assessing Equivalence of Standardized Means.” Statistics in
    Biopharmaceutical Research 0 (0): 1–8.
    https://doi.org/10.1080/19466315.2019.1654915.

    """
    nobs_t = nobs.sum()
    nobs_mean = nobs_t / n_groups

    if margin_type == "wellek":
        nc_null = nobs_mean * equiv_margin**2
        es = f_stat * (n_groups - 1) / nobs_mean
        type_effectsize = "Wellek's psi_squared"
    elif margin_type in ["f2", "fsqu", "fsquared"]:
        nc_null = nobs_t * equiv_margin
        es = f_stat / nobs_t
        type_effectsize = "Cohen's f_squared"
    else:
        raise ValueError('`margin_type` should be "f2" or "wellek"')
    crit_f = ncf_ppf(alpha, df[0], df[1], nc_null)

    if margin_type == "wellek":
        # TODO: do we need a sqrt
        crit_es = crit_f * (n_groups - 1) / nobs_mean
    elif margin_type in ["f2", "fsqu", "fsquared"]:
        crit_es = crit_f / nobs_t

    reject = (es < crit_es)

    pv = ncf_cdf(f_stat, df[0], df[1], nc_null)
    pwr = ncf_cdf(crit_f, df[0], df[1], 1e-13)  # scipy, cannot be 0
    res = HolderTuple(statistic=f_stat,
                      pvalue=pv,
                      effectsize=es,  # match es type to margin_type
                      crit_f=crit_f,
                      crit_es=crit_es,
                      reject=reject,
                      power_zero=pwr,
                      df=df,
                      f_stat=f_stat,
                      type_effectsize=type_effectsize
                      )
    return res




[docs]
def equivalence_oneway(data, equiv_margin, groups=None, use_var="unequal",
                       welch_correction=True, trim_frac=0, margin_type="f2"):
    """equivalence test for oneway anova (Wellek's Anova)

    The null hypothesis is that the means differ by more than `equiv_margin`
    in the anova distance measure.
    If the Null is rejected, then the data supports that means are equivalent,
    i.e. within a given distance.

    Parameters
    ----------
    data : tuple of array_like or DataFrame or Series
        Data for k independent samples, with k >= 2.
        The data can be provided as a tuple or list of arrays or in long
        format with outcome observations in ``data`` and group membership in
        ``groups``.
    equiv_margin : float
        Equivalence margin in terms of effect size. Effect size can be chosen
        with `margin_type`. default is squared Cohen's f.
    groups : ndarray or Series
        If data is in long format, then groups is needed as indicator to which
        group or sample and observations belongs.
    use_var : {"unequal", "equal" or "bf"}
        `use_var` specified how to treat heteroscedasticity, unequal variance,
        across samples. Three approaches are available

        "unequal" : Variances are not assumed to be equal across samples.
            Heteroscedasticity is taken into account with Welch Anova and
            Satterthwaite-Welch degrees of freedom.
            This is the default.
        "equal" : Variances are assumed to be equal across samples.
            This is the standard Anova.
        "bf: Variances are not assumed to be equal across samples.
            The method is Browne-Forsythe (1971) for testing equality of means
            with the corrected degrees of freedom by Merothra. The original BF
            degrees of freedom are available as additional attributes in the
            results instance, ``df_denom2`` and ``p_value2``.

    welch_correction : bool
        If this is false, then the Welch correction to the test statistic is
        not included. This allows the computation of an effect size measure
        that corresponds more closely to Cohen's f.
    trim_frac : float in [0, 0.5)
        Optional trimming for Anova with trimmed mean and winsorized variances.
        With the default trim_frac equal to zero, the oneway Anova statistics
        are computed without trimming. If `trim_frac` is larger than zero,
        then the largest and smallest observations in each sample are trimmed.
        The number of trimmed observations is the fraction of number of
        observations in the sample truncated to the next lower integer.
        `trim_frac` has to be smaller than 0.5, however, if the fraction is
        so large that there are not enough observations left over, then `nan`
        will be returned.
    margin_type : "f2" or "wellek"
        Type of effect size used for equivalence margin, either squared
        Cohen's f or Wellek's psi. Default is "f2".

    Returns
    -------
    results : instance of HolderTuple class
        The two main attributes are test statistic `statistic` and p-value
        `pvalue`.

    See Also
    --------
    anova_oneway
    equivalence_scale_oneway
    """

    # use anova to compute summary statistics and f-statistic
    res0 = anova_oneway(data, groups=groups, use_var=use_var,
                        welch_correction=welch_correction,
                        trim_frac=trim_frac)
    f_stat = res0.statistic
    res = equivalence_oneway_generic(f_stat, res0.n_groups, res0.nobs_t,
                                     equiv_margin, res0.df, alpha=0.05,
                                     margin_type=margin_type)

    return res




[docs]
def _power_equivalence_oneway_emp(f_stat, n_groups, nobs, eps, df, alpha=0.05):
    """Empirical power of oneway equivalence test

    This only returns post-hoc, empirical power.

    Warning: eps is currently effect size margin as defined as in Wellek, and
    not the signal to noise ratio (Cohen's f family).

    Parameters
    ----------
    f_stat : float
        F-statistic from oneway anova, used to compute empirical effect size
    n_groups : int
        Number of groups in oneway comparison.
    nobs : ndarray
        Array of number of observations in groups.
    eps : float
        Equivalence margin in terms of effect size given by Wellek's psi.
    df : tuple
        Degrees of freedom for F distribution.
    alpha : float in (0, 1)
        Significance level for the hypothesis test.

    Returns
    -------
    pow : float
        Ex-post, post-hoc or empirical power at f-statistic of the equivalence
        test.
    """

    res = equivalence_oneway_generic(f_stat, n_groups, nobs, eps, df,
                                     alpha=alpha, margin_type="wellek")

    nobs_mean = nobs.sum() / n_groups
    fn = f_stat  # post-hoc power, empirical power at estimate
    esn = fn * (n_groups - 1) / nobs_mean  # Wellek psi
    pow_ = ncf_cdf(res.crit_f, df[0], df[1], nobs_mean * esn)

    return pow_




[docs]
def power_equivalence_oneway(f2_alt, equiv_margin, nobs_t, n_groups=None,
                             df=None, alpha=0.05, margin_type="f2"):
    """
    Power of  oneway equivalence test

    Parameters
    ----------
    f2_alt : float
        Effect size, squared Cohen's f, under the alternative.
    equiv_margin : float
        Equivalence margin in terms of effect size. Effect size can be chosen
        with `margin_type`. default is squared Cohen's f.
    nobs_t : ndarray
        Total number of observations summed over all groups.
    n_groups : int
        Number of groups in oneway comparison. If margin_type is "wellek",
        then either ``n_groups`` or ``df`` has to be given.
    df : tuple
        Degrees of freedom for F distribution,
        ``df = (n_groups - 1, nobs_t - n_groups)``
    alpha : float in (0, 1)
        Significance level for the hypothesis test.
    margin_type : "f2" or "wellek"
        Type of effect size used for equivalence margin, either squared
        Cohen's f or Wellek's psi. Default is "f2".

    Returns
    -------
    pow_alt : float
        Power of the equivalence test at given equivalence effect size under
        the alternative.
    """

    # one of n_groups or df has to be specified
    if df is None:
        if n_groups is None:
            raise ValueError("either df or n_groups has to be provided")
        df = (n_groups - 1, nobs_t - n_groups)

    # esn = fn * (n_groups - 1) / nobs_mean  # Wellek psi

    # fix for scipy, ncf does not allow nc == 0, fixed in scipy master
    if f2_alt == 0:
        f2_alt = 1e-13
    # effect size, critical value at margin
    # f2_null = equiv_margin
    if margin_type in ["f2", "fsqu", "fsquared"]:
        f2_null = equiv_margin
    elif margin_type == "wellek":
        if n_groups is None:
            raise ValueError("If margin_type is wellek, then n_groups has "
                             "to be provided")
        #  f2_null = (n_groups - 1) * n_groups / nobs_t * equiv_margin**2
        nobs_mean = nobs_t / n_groups
        f2_null = nobs_mean * equiv_margin**2 / nobs_t
        f2_alt = nobs_mean * f2_alt**2 / nobs_t
    else:
        raise ValueError('`margin_type` should be "f2" or "wellek"')

    crit_f_margin = ncf_ppf(alpha, df[0], df[1], nobs_t * f2_null)
    pwr_alt = ncf_cdf(crit_f_margin, df[0], df[1], nobs_t * f2_alt)
    return pwr_alt




[docs]
def simulate_power_equivalence_oneway(means, nobs, equiv_margin, vars_=None,
                                      k_mc=1000, trim_frac=0,
                                      options_var=None, margin_type="f2"
                                      ):  # , anova_options=None):  #TODO
    """Simulate Power for oneway equivalence test (Wellek's Anova)

    This function is experimental and written to evaluate asymptotic power
    function. This function will change without backwards compatibility
    constraints. The only part that is stable is `pvalue` attribute in results.

    Effect size for equivalence margin

    """
    if options_var is None:
        options_var = ["unequal", "equal", "bf"]
    if vars_ is not None:
        stds = np.sqrt(vars_)
    else:
        stds = np.ones(len(means))

    nobs_mean = nobs.mean()
    n_groups = len(nobs)
    res_mc = []
    f_mc = []
    reject_mc = []
    other_mc = []
    for _ in range(k_mc):
        y0, y1, y2, y3 = (m + std * np.random.randn(n)
                          for (n, m, std) in zip(nobs, means, stds))

        res_i = []
        f_i = []
        reject_i = []
        other_i = []
        for uv in options_var:
            # for welch in options_welch:
            # res1 = sma.anova_generic(means, vars_, nobs, use_var=uv,
            #                          welch_correction=welch)
            res0 = anova_oneway([y0, y1, y2, y3], use_var=uv,
                                trim_frac=trim_frac)
            f_stat = res0.statistic
            res1 = equivalence_oneway_generic(f_stat, n_groups, nobs.sum(),
                                              equiv_margin, res0.df,
                                              alpha=0.05,
                                              margin_type=margin_type)
            res_i.append(res1.pvalue)
            es_wellek = f_stat * (n_groups - 1) / nobs_mean
            f_i.append(es_wellek)
            reject_i.append(res1.reject)
            other_i.extend([res1.crit_f, res1.crit_es, res1.power_zero])
        res_mc.append(res_i)
        f_mc.append(f_i)
        reject_mc.append(reject_i)
        other_mc.append(other_i)

    f_mc = np.asarray(f_mc)
    other_mc = np.asarray(other_mc)
    res_mc = np.asarray(res_mc)
    reject_mc = np.asarray(reject_mc)
    res = Holder(f_stat=f_mc,
                 other=other_mc,
                 pvalue=res_mc,
                 reject=reject_mc
                 )
    return res




[docs]
def test_scale_oneway(data, method="bf", center="median", transform="abs",
                      trim_frac_mean=0.1, trim_frac_anova=0.0):
    """Oneway Anova test for equal scale, variance or dispersion

    This hypothesis test performs a oneway anova test on transformed data and
    includes Levene and Brown-Forsythe tests for equal variances as special
    cases.

    Parameters
    ----------
    data : tuple of array_like or DataFrame or Series
        Data for k independent samples, with k >= 2. The data can be provided
        as a tuple or list of arrays or in long format with outcome
        observations in ``data`` and group membership in ``groups``.
    method : {"unequal", "equal" or "bf"}
        How to treat heteroscedasticity across samples. This is used as
        `use_var` option in `anova_oneway` and refers to the variance of the
        transformed data, i.e. assumption is on 4th moment if squares are used
        as transform.
        Three approaches are available:

        "unequal" : Variances are not assumed to be equal across samples.
            Heteroscedasticity is taken into account with Welch Anova and
            Satterthwaite-Welch degrees of freedom.
            This is the default.
        "equal" : Variances are assumed to be equal across samples.
            This is the standard Anova.
        "bf" : Variances are not assumed to be equal across samples.
            The method is Browne-Forsythe (1971) for testing equality of means
            with the corrected degrees of freedom by Merothra. The original BF
            degrees of freedom are available as additional attributes in the
            results instance, ``df_denom2`` and ``p_value2``.

    center : "median", "mean", "trimmed" or float
        Statistic used for centering observations. If a float, then this
        value is used to center. Default is median.
    transform : "abs", "square" or callable
        Transformation for the centered observations. If a callable, then this
        function is called on the centered data.
        Default is absolute value.
    trim_frac_mean=0.1 : float in [0, 0.5)
        Trim fraction for the trimmed mean when `center` is "trimmed"
    trim_frac_anova : float in [0, 0.5)
        Optional trimming for Anova with trimmed mean and Winsorized variances.
        With the default trim_frac equal to zero, the oneway Anova statistics
        are computed without trimming. If `trim_frac` is larger than zero,
        then the largest and smallest observations in each sample are trimmed.
        see ``trim_frac`` option in `anova_oneway`

    Returns
    -------
    res : results instance
        The returned HolderTuple instance has the following main attributes
        and some additional information in other attributes.

        statistic : float
            Test statistic for k-sample mean comparison which is approximately
            F-distributed.
        pvalue : float
            If ``method="bf"``, then the p-value is based on corrected
            degrees of freedom following Mehrotra 1997.
        pvalue2 : float
            This is the p-value based on degrees of freedom as in
            Brown-Forsythe 1974 and is only available if ``method="bf"``.
        df : (df_denom, df_num)
            Tuple containing degrees of freedom for the F-distribution depend
            on ``method``. If ``method="bf"``, then `df_denom` is for Mehrotra
            p-values `df_denom2` is available for Brown-Forsythe 1974 p-values.
            `df_num` is the same numerator degrees of freedom for both
            p-values.

    See Also
    --------
    anova_oneway
    scale_transform
    """

    data = map(np.asarray, data)
    xxd = [scale_transform(x, center=center, transform=transform,
                           trim_frac=trim_frac_mean) for x in data]

    res = anova_oneway(xxd, groups=None, use_var=method,
                       welch_correction=True, trim_frac=trim_frac_anova)
    res.data_transformed = xxd
    return res




[docs]
def equivalence_scale_oneway(data, equiv_margin, method='bf', center='median',
                             transform='abs', trim_frac_mean=0.,
                             trim_frac_anova=0.):
    """Oneway Anova test for equivalence of scale, variance or dispersion

    This hypothesis test performs a oneway equivalence anova test on
    transformed data.

    Note, the interpretation of the equivalence margin `equiv_margin` will
    depend on the transformation of the data. Transformations like
    absolute deviation are not scaled to correspond to the variance under
    normal distribution.

    Parameters
    ----------
    data : tuple of array_like or DataFrame or Series
        Data for k independent samples, with k >= 2. The data can be provided
        as a tuple or list of arrays or in long format with outcome
        observations in ``data`` and group membership in ``groups``.
    equiv_margin : float
        Equivalence margin in terms of effect size. Effect size can be chosen
        with `margin_type`. default is squared Cohen's f.
    method : {"unequal", "equal" or "bf"}
        How to treat heteroscedasticity across samples. This is used as
        `use_var` option in `anova_oneway` and refers to the variance of the
        transformed data, i.e. assumption is on 4th moment if squares are used
        as transform.
        Three approaches are available:

        "unequal" : Variances are not assumed to be equal across samples.
            Heteroscedasticity is taken into account with Welch Anova and
            Satterthwaite-Welch degrees of freedom.
            This is the default.
        "equal" : Variances are assumed to be equal across samples.
            This is the standard Anova.
        "bf" : Variances are not assumed to be equal across samples.
            The method is Browne-Forsythe (1971) for testing equality of means
            with the corrected degrees of freedom by Merothra. The original BF
            degrees of freedom are available as additional attributes in the
            results instance, ``df_denom2`` and ``p_value2``.
    center : "median", "mean", "trimmed" or float
        Statistic used for centering observations. If a float, then this
        value is used to center. Default is median.
    transform : "abs", "square" or callable
        Transformation for the centered observations. If a callable, then this
        function is called on the centered data.
        Default is absolute value.
    trim_frac_mean : float in [0, 0.5)
        Trim fraction for the trimmed mean when `center` is "trimmed"
    trim_frac_anova : float in [0, 0.5)
        Optional trimming for Anova with trimmed mean and Winsorized variances.
        With the default trim_frac equal to zero, the oneway Anova statistics
        are computed without trimming. If `trim_frac` is larger than zero,
        then the largest and smallest observations in each sample are trimmed.
        see ``trim_frac`` option in `anova_oneway`

    Returns
    -------
    results : instance of HolderTuple class
        The two main attributes are test statistic `statistic` and p-value
        `pvalue`.

    See Also
    --------
    anova_oneway
    scale_transform
    equivalence_oneway
    """
    data = map(np.asarray, data)
    xxd = [scale_transform(x, center=center, transform=transform,
                           trim_frac=trim_frac_mean) for x in data]

    res = equivalence_oneway(xxd, equiv_margin, use_var=method,
                             welch_correction=True, trim_frac=trim_frac_anova)
    res.x_transformed = xxd
    return res