Source code for statsmodels.tsa.stl.mstl

"""
Author: Kishan Manani
License: BSD-3 Clause

An implementation of MSTL [1], an algorithm for time series decomposition when
there are multiple seasonal components.

This implementation has the following differences with the original algorithm:
- Missing data must be handled outside of this class.
- The algorithm proposed in the paper handles a case when there is no
seasonality. This implementation assumes that there is at least one seasonal
component.

[1] K. Bandura, R.J. Hyndman, and C. Bergmeir (2021)
MSTL: A Seasonal-Trend Decomposition Algorithm for Time Series with Multiple
Seasonal Patterns
https://arxiv.org/pdf/2107.13462.pdf
"""
from collections.abc import Sequence
from typing import Optional, Union
import warnings

import numpy as np
import pandas as pd
from scipy.stats import boxcox

from statsmodels.tools.typing import ArrayLike1D
from statsmodels.tsa.seasonal._seasonal import DecomposeResult
from statsmodels.tsa.stl._stl import STL
from statsmodels.tsa.tsatools import freq_to_period


[docs] class MSTL: """ MSTL(endog, periods=None, windows=None, lmbda=None, iterate=2, stl_kwargs=None) Season-Trend decomposition using LOESS for multiple seasonalities. .. versionadded:: 0.14.0 Parameters ---------- endog : array_like Data to be decomposed. Must be squeezable to 1-d. periods : {int, array_like, None}, optional Periodicity of the seasonal components. If None and endog is a pandas Series or DataFrame, attempts to determine from endog. If endog is a ndarray, periods must be provided. windows : {int, array_like, None}, optional Length of the seasonal smoothers for each corresponding period. Must be an odd integer, and should normally be >= 7 (default). If None then default values determined using 7 + 4 * np.arange(1, n + 1, 1) where n is number of seasonal components. lmbda : {float, str, None}, optional The lambda parameter for the Box-Cox transform to be applied to `endog` prior to decomposition. If None, no transform is applied. If "auto", a value will be estimated that maximizes the log-likelihood function. iterate : int, optional Number of iterations to use to refine the seasonal component. stl_kwargs: dict, optional Arguments to pass to STL. See Also -------- statsmodels.tsa.seasonal.STL References ---------- .. [1] K. Bandura, R.J. Hyndman, and C. Bergmeir (2021) MSTL: A Seasonal-Trend Decomposition Algorithm for Time Series with Multiple Seasonal Patterns. arXiv preprint arXiv:2107.13462. Examples -------- Start by creating a toy dataset with hourly frequency and multiple seasonal components. >>> import numpy as np >>> import matplotlib.pyplot as plt >>> import pandas as pd >>> pd.plotting.register_matplotlib_converters() >>> np.random.seed(0) >>> t = np.arange(1, 1000) >>> trend = 0.0001 * t ** 2 + 100 >>> daily_seasonality = 5 * np.sin(2 * np.pi * t / 24) >>> weekly_seasonality = 10 * np.sin(2 * np.pi * t / (24 * 7)) >>> noise = np.random.randn(len(t)) >>> y = trend + daily_seasonality + weekly_seasonality + noise >>> index = pd.date_range(start='2000-01-01', periods=len(t), freq='h') >>> data = pd.DataFrame(data=y, index=index) Use MSTL to decompose the time series into two seasonal components with periods 24 (daily seasonality) and 24*7 (weekly seasonality). >>> from statsmodels.tsa.seasonal import MSTL >>> res = MSTL(data, periods=(24, 24*7)).fit() >>> res.plot() >>> plt.tight_layout() >>> plt.show() .. plot:: plots/mstl_plot.py """ def __init__( self, endog: ArrayLike1D, *, periods: Optional[Union[int, Sequence[int]]] = None, windows: Optional[Union[int, Sequence[int]]] = None, lmbda: Optional[Union[float, str]] = None, iterate: int = 2, stl_kwargs: Optional[dict[str, Union[int, bool, None]]] = None, ): self.endog = endog self._y = self._to_1d_array(endog) self.nobs = self._y.shape[0] self.lmbda = lmbda self.periods, self.windows = self._process_periods_and_windows( periods, windows ) self.iterate = iterate self._stl_kwargs = self._remove_overloaded_stl_kwargs( stl_kwargs if stl_kwargs else {} )
[docs] def fit(self): """ Estimate a trend component, multiple seasonal components, and a residual component. Returns ------- DecomposeResult Estimation results. """ num_seasons = len(self.periods) iterate = 1 if num_seasons == 1 else self.iterate # Box Cox if self.lmbda == "auto": y, lmbda = boxcox(self._y, lmbda=None) self.est_lmbda = lmbda elif self.lmbda: y = boxcox(self._y, lmbda=self.lmbda) else: y = self._y # Get STL fit params stl_inner_iter = self._stl_kwargs.pop("inner_iter", None) stl_outer_iter = self._stl_kwargs.pop("outer_iter", None) # Iterate over each seasonal component to extract seasonalities seasonal = np.zeros(shape=(num_seasons, self.nobs)) deseas = y for _ in range(iterate): for i in range(num_seasons): deseas = deseas + seasonal[i] res = STL( endog=deseas, period=self.periods[i], seasonal=self.windows[i], **self._stl_kwargs, ).fit(inner_iter=stl_inner_iter, outer_iter=stl_outer_iter) seasonal[i] = res.seasonal deseas = deseas - seasonal[i] seasonal = np.squeeze(seasonal.T) trend = res.trend rw = res.weights resid = deseas - trend # Return pandas if endog is pandas if isinstance(self.endog, (pd.Series, pd.DataFrame)): index = self.endog.index y = pd.Series(y, index=index, name="observed") trend = pd.Series(trend, index=index, name="trend") resid = pd.Series(resid, index=index, name="resid") rw = pd.Series(rw, index=index, name="robust_weight") cols = [f"seasonal_{period}" for period in self.periods] if seasonal.ndim == 1: seasonal = pd.Series(seasonal, index=index, name="seasonal") else: seasonal = pd.DataFrame(seasonal, index=index, columns=cols) return DecomposeResult(y, seasonal, trend, resid, rw)
def __str__(self): return ( "MSTL(endog," f" periods={self.periods}," f" windows={self.windows}," f" lmbda={self.lmbda}," f" iterate={self.iterate})" ) def _process_periods_and_windows( self, periods: Union[int, Sequence[int], None], windows: Union[int, Sequence[int], None], ) -> tuple[Sequence[int], Sequence[int]]: periods = self._process_periods(periods) if windows: windows = self._process_windows(windows, num_seasons=len(periods)) periods, windows = self._sort_periods_and_windows(periods, windows) else: windows = self._process_windows(windows, num_seasons=len(periods)) periods = sorted(periods) if len(periods) != len(windows): raise ValueError("Periods and windows must have same length") # Remove long periods from decomposition if any(period >= self.nobs / 2 for period in periods): warnings.warn( "A period(s) is larger than half the length of time series." " Removing these period(s).", UserWarning ) periods = tuple( period for period in periods if period < self.nobs / 2 ) windows = windows[: len(periods)] return periods, windows def _process_periods( self, periods: Union[int, Sequence[int], None] ) -> Sequence[int]: if periods is None: periods = (self._infer_period(),) elif isinstance(periods, int): periods = (periods,) else: pass return periods def _process_windows( self, windows: Union[int, Sequence[int], None], num_seasons: int, ) -> Sequence[int]: if windows is None: windows = self._default_seasonal_windows(num_seasons) elif isinstance(windows, int): windows = (windows,) else: pass return windows def _infer_period(self) -> int: freq = None if isinstance(self.endog, (pd.Series, pd.DataFrame)): freq = getattr(self.endog.index, "inferred_freq", None) if freq is None: raise ValueError("Unable to determine period from endog") period = freq_to_period(freq) return period @staticmethod def _sort_periods_and_windows( periods, windows ) -> tuple[Sequence[int], Sequence[int]]: if len(periods) != len(windows): raise ValueError("Periods and windows must have same length") periods, windows = zip(*sorted(zip(periods, windows))) return periods, windows @staticmethod def _remove_overloaded_stl_kwargs(stl_kwargs: dict) -> dict: args = ["endog", "period", "seasonal"] for arg in args: stl_kwargs.pop(arg, None) return stl_kwargs @staticmethod def _default_seasonal_windows(n: int) -> Sequence[int]: return tuple(7 + 4 * i for i in range(1, n + 1)) # See [1] @staticmethod def _to_1d_array(x): y = np.ascontiguousarray(np.squeeze(np.asarray(x)), dtype=np.double) if y.ndim != 1: raise ValueError("y must be a 1d array") return y

Last update: Jan 20, 2025