Source code for statsmodels.tsa.stattools._arma_order_selection
from __future__ import annotations
from typing import Union
import numpy as np
from numpy.linalg import LinAlgError
import pandas as pd
from statsmodels.tools.tools import Bunch
from statsmodels.tools.validation import (
array_like,
dict_like,
int_like,
string_like,
)
ArrayLike1D = Union[np.ndarray, pd.Series, list[float]]
def _safe_arma_fit(y, order, model_kw, trend, fit_kw, start_params=None):
from statsmodels.tsa.arima.model import ARIMA
try:
return ARIMA(y, order=order, **model_kw, trend=trend).fit(
start_params=start_params, **fit_kw
)
except LinAlgError:
# SVD convergence failure on badly misspecified models
return
except ValueError as error:
if start_params is not None: # do not recurse again
# user supplied start_params only get one chance
return
# try a little harder, should be handled in fit really
elif "initial" not in error.args[0] or "initial" in str(error):
start_params = [0.1] * sum(order)
if trend == "c":
start_params = [0.1] + start_params
return _safe_arma_fit(y, order, model_kw, trend, fit_kw, start_params)
else:
return
except Exception: # no idea what happened
return
[docs]
def arma_order_select_ic(
y, max_ar=4, max_ma=2, ic="bic", trend="c", model_kw=None, fit_kw=None
):
"""
Compute information criteria for many ARMA models.
Parameters
----------
y : array_like
Array of time-series data.
max_ar : int
Maximum number of AR lags to use. Default 4.
max_ma : int
Maximum number of MA lags to use. Default 2.
ic : str, list
Information criteria to report. Either a single string or a list
of different criteria is possible.
trend : str
The trend to use when fitting the ARMA models.
model_kw : dict
Keyword arguments to be passed to the ``ARMA`` model.
fit_kw : dict
Keyword arguments to be passed to ``ARMA.fit``.
Returns
-------
Bunch
Dict-like object with attribute access. Each ic is an attribute with a
DataFrame for the results. The AR order used is the row index. The ma
order used is the column index. The minimum orders are available as
``ic_min_order``.
Notes
-----
This method can be used to tentatively identify the order of an ARMA
process, provided that the time series is stationary and invertible. This
function computes the full exact MLE estimate of each model and can be,
therefore a little slow. An implementation using approximate estimates
will be provided in the future. In the meantime, consider passing
{method : "css"} to fit_kw.
Examples
--------
>>> from statsmodels.tsa.arima_process import arma_generate_sample
>>> import statsmodels.api as sm
>>> import numpy as np
>>> arparams = np.array([.75, -.25])
>>> maparams = np.array([.65, .35])
>>> arparams = np.r_[1, -arparams]
>>> maparam = np.r_[1, maparams]
>>> nobs = 250
>>> np.random.seed(2014)
>>> y = arma_generate_sample(arparams, maparams, nobs)
>>> res = sm.tsa.arma_order_select_ic(y, ic=["aic", "bic"], trend="n")
>>> res.aic_min_order
>>> res.bic_min_order
"""
max_ar = int_like(max_ar, "max_ar")
max_ma = int_like(max_ma, "max_ma")
trend = string_like(trend, "trend", options=("n", "c"))
model_kw = dict_like(model_kw, "model_kw", optional=True)
fit_kw = dict_like(fit_kw, "fit_kw", optional=True)
ar_range = [i for i in range(max_ar + 1)]
ma_range = [i for i in range(max_ma + 1)]
if isinstance(ic, str):
ic = [ic]
elif not isinstance(ic, (list, tuple)):
raise ValueError("Need a list or a tuple for ic if not a string.")
results = np.zeros((len(ic), max_ar + 1, max_ma + 1))
model_kw = {} if model_kw is None else model_kw
fit_kw = {} if fit_kw is None else fit_kw
y_arr = array_like(y, "y", contiguous=True)
for ar in ar_range:
for ma in ma_range:
mod = _safe_arma_fit(y_arr, (ar, 0, ma), model_kw, trend, fit_kw)
if mod is None:
results[:, ar, ma] = np.nan
continue
for i, criteria in enumerate(ic):
results[i, ar, ma] = getattr(mod, criteria)
dfs = [pd.DataFrame(res, columns=ma_range, index=ar_range) for res in results]
res = dict(zip(ic, dfs))
# add the minimums to the results dict
min_res = {}
for i, result in res.items():
delta = np.ascontiguousarray(np.abs(result.min().min() - result))
ncols = delta.shape[1]
loc = np.argmin(delta)
min_res.update({i + "_min_order": (loc // ncols, loc % ncols)})
res.update(min_res)
return Bunch(**res)
Last update:
Jan 27, 2025