'''
Bland-Altman mean-difference plots
Author: Joses Ho
License: BSD-3
'''
import numpy as np
from . import utils
[docs]def mean_diff_plot(m1, m2, sd_limit=1.96, ax=None, scatter_kwds=None,
mean_line_kwds=None, limit_lines_kwds=None):
"""
Tukey's Mean Difference Plot.
Tukey's Mean Difference Plot (also known as a Bland-Altman plot) is a
graphical method to analyze the differences between two methods of
measurement. The mean of the measures is plotted against their difference.
For more information see
https://en.wikipedia.org/wiki/Bland-Altman_plot
Parameters
----------
m1, m2: pandas Series or array-like
sd_limit : float, default 1.96
The limit of agreements expressed in terms of the standard deviation of
the differences. If `md` is the mean of the differences, and `sd` is
the standard deviation of those differences, then the limits of
agreement that will be plotted will be
md - sd_limit * sd, md + sd_limit * sd
The default of 1.96 will produce 95% confidence intervals for the means
of the differences.
If sd_limit = 0, no limits will be plotted, and the ylimit of the plot
defaults to 3 standard deviatons on either side of the mean.
ax: matplotlib AxesSubplot instance, optional
If `ax` is None, then a figure is created. If an axis instance is
given, the mean difference plot is drawn on the axis.
scatter_kwargs: keywords
Options to to style the scatter plot. Accepts any keywords for the
matplotlib Axes.scatter plotting method
mean_line_kwds: keywords
Options to to style the scatter plot. Accepts any keywords for the
matplotlib Axes.axhline plotting method
limit_lines_kwds: keywords
Options to to style the scatter plot. Accepts any keywords for the
matplotlib Axes.axhline plotting method
Returns
-------
fig : matplotlib Figure
If `ax` is None, the created figure. Otherwise the figure to which
`ax` is connected.
References
----------
Bland JM, Altman DG (1986). "Statistical methods for assessing agreement
between two methods of clinical measurement"
Example
--------
Load relevant libraries.
>>> import statsmodels.api as sm
>>> import numpy as np
>>> import matplotlib.pyplot as plt
Making a mean difference plot.
>>> # Seed the random number generator.
>>> # This ensures that the results below are reproducible.
>>> np.random.seed(9999)
>>> m1 = np.random.random(20)
>>> m2 = np.random.random(20)
>>> f, ax = plt.subplots(1, figsize = (8,5))
>>> sm.graphics.mean_diff_plot(m1, m2, ax = ax)
>>> plt.show()
.. plot:: plots/graphics-mean_diff_plot.py
"""
fig, ax = utils.create_mpl_ax(ax)
if len(m1) != len(m2):
raise ValueError('m1 does not have the same length as m2.')
if sd_limit < 0:
raise ValueError('sd_limit ({}) is less than 0.'.format(sd_limit))
means = np.mean([m1, m2], axis=0)
diffs = m1 - m2
mean_diff = np.mean(diffs)
std_diff = np.std(diffs, axis=0)
scatter_kwds = scatter_kwds or {}
if 's' not in scatter_kwds:
scatter_kwds['s'] = 20
mean_line_kwds = mean_line_kwds or {}
limit_lines_kwds = limit_lines_kwds or {}
for kwds in [mean_line_kwds, limit_lines_kwds]:
if 'color' not in kwds:
kwds['color'] = 'gray'
if 'linewidth' not in kwds:
kwds['linewidth'] = 1
if 'linestyle' not in mean_line_kwds:
kwds['linestyle'] = '--'
if 'linestyle' not in limit_lines_kwds:
kwds['linestyle'] = ':'
ax.scatter(means, diffs, **scatter_kwds) # Plot the means against the diffs.
ax.axhline(mean_diff, **mean_line_kwds) # draw mean line.
# Annotate mean line with mean difference.
ax.annotate('mean diff:\n{}'.format(np.round(mean_diff, 2)),
xy=(0.99, 0.5),
horizontalalignment='right',
verticalalignment='center',
fontsize=14,
xycoords='axes fraction')
if sd_limit > 0:
half_ylim = (1.5 * sd_limit) * std_diff
ax.set_ylim(mean_diff - half_ylim,
mean_diff + half_ylim)
limit_of_agreement = sd_limit * std_diff
lower = mean_diff - limit_of_agreement
upper = mean_diff + limit_of_agreement
for j, lim in enumerate([lower, upper]):
ax.axhline(lim, **limit_lines_kwds)
ax.annotate('-SD{}: {}'.format(sd_limit, np.round(lower, 2)),
xy=(0.99, 0.07),
horizontalalignment='right',
verticalalignment='bottom',
fontsize=14,
xycoords='axes fraction')
ax.annotate('+SD{}: {}'.format(sd_limit, np.round(upper, 2)),
xy=(0.99, 0.92),
horizontalalignment='right',
fontsize=14,
xycoords='axes fraction')
elif sd_limit == 0:
half_ylim = 3 * std_diff
ax.set_ylim(mean_diff - half_ylim,
mean_diff + half_ylim)
ax.set_ylabel('Difference', fontsize=15)
ax.set_xlabel('Means', fontsize=15)
ax.tick_params(labelsize=13)
fig.tight_layout()
return fig