# -*- coding: utf-8 -*-
"""
News for state space models
Author: Chad Fulton
License: BSD-3
"""
import numpy as np
import pandas as pd
from statsmodels.iolib.table import SimpleTable
from statsmodels.iolib.summary import Summary
from statsmodels.iolib.tableformatting import fmt_params
class NewsResults(object):
"""
Impacts of data revisions and news on estimates of variables of interest
Parameters
----------
news_results : SimpleNamespace instance
Results from `KalmanSmoother.news`.
model : MLEResults
The results object associated with the model from which the NewsResults
was generated.
updated : MLEResults
The results object associated with the model containing the updated
dataset.
previous : MLEResults
The results object associated with the model containing the previous
dataset.
impacted_variable : str, list, array, or slice, optional
Observation variable label or slice of labels specifying particular
impacted variables to display in output. The impacted variable(s)
describe the variables that were *affected* by the news. If you do not
know the labels for the variables, check the `endog_names` attribute of
the model instance.
tolerance : float, optional
The numerical threshold for determining zero impact. Default is that
any impact less than 1e-10 is assumed to be zero.
row_labels : iterable
Row labels (often dates) for the impacts of the revisions and news.
Attributes
----------
total_impacts : pd.Series
Updates to forecasts of impacted variables from both news and data
revisions, E[y^i | post] - E[y^i | previous].
update_impacts : pd.Series
Updates to forecasts of impacted variables from the news,
E[y^i | post] - E[y^i | revisions] where y^i are the impacted variables
of interest.
revision_impacts : pd.Series
Updates to forecasts of impacted variables from data revisions,
E[y^i | revisions] - E[y^i | previous].
news : pd.Series
The unexpected component of the updated data,
E[y^u | post] - E[y^u | revisions] where y^u are the updated variables.
weights : pd.Series
Weights describing the effect of news on variables of interest.
update_forecasts : pd.Series
Forecasts based on the previous dataset of the variables that were
updated, E[y^u | previous].
update_realized : pd.Series
Actual observed data associated with the variables that were
updated, y^u
prev_impacted_forecasts : pd.Series
Previous forecast of the variables of interest, E[y^i | previous].
post_impacted_forecasts : pd.Series
Forecast of the variables of interest after taking into account both
revisions and updates, E[y^i | post].
revisions_iloc : pd.DataFrame
The integer locations of the data revisions in the dataset.
revisions_ix : pd.DataFrame
The label-based locations of the data revisions in the dataset.
updates_iloc : pd.DataFrame
The integer locations of the updated data points.
updates_ix : pd.DataFrame
The label-based locations of updated data points.
References
----------
.. [1] Bańbura, Marta, and Michele Modugno.
"Maximum likelihood estimation of factor models on datasets with
arbitrary pattern of missing data."
Journal of Applied Econometrics 29, no. 1 (2014): 133-160.
.. [2] Bańbura, Marta, Domenico Giannone, and Lucrezia Reichlin.
"Nowcasting."
The Oxford Handbook of Economic Forecasting. July 8, 2011.
.. [3] Bańbura, Marta, Domenico Giannone, Michele Modugno, and Lucrezia
Reichlin.
"Now-casting and the real-time data flow."
In Handbook of economic forecasting, vol. 2, pp. 195-237.
Elsevier, 2013.
"""
def __init__(self, news_results, model, updated, previous,
impacted_variable=None, tolerance=1e-10, row_labels=None):
# Note: `model` will be the same as one of `revised` or `previous`, but
# we need to save it as self.model so that the `predict_dates`, which
# were generated by the `_get_prediction_index` call, will be available
# for use by the base wrapping code.
self.model = model
self.updated = updated
self.previous = previous
self.news_results = news_results
self._impacted_variable = impacted_variable
self._tolerance = tolerance
self.row_labels = row_labels
self.params = [] # required for `summary` to work
columns = np.atleast_1d(self.updated.model.endog_names)
# E[y^i | post]
self.post_impacted_forecasts = pd.DataFrame(
news_results.post_impacted_forecasts.T,
index=self.row_labels, columns=columns)
# E[y^i | previous]
self.prev_impacted_forecasts = pd.DataFrame(
news_results.prev_impacted_forecasts.T,
index=self.row_labels, columns=columns)
# E[y^i | post] - E[y^i | revisions]
self.update_impacts = pd.DataFrame(
news_results.update_impacts,
index=self.row_labels, columns=columns)
# E[y^i | revisions] - E[y^i | previous]
self.revision_impacts = pd.DataFrame(
news_results.revision_impacts,
index=self.row_labels, columns=columns)
# E[y^i | post] - E[y^i | previous]
self.total_impacts = (self.post_impacted_forecasts -
self.prev_impacted_forecasts)
# Indices of revisions and updates
index = self.updated.model._index
self.revisions_iloc = pd.DataFrame(
list(zip(*news_results.revisions_ix)),
index=['revision date', 'revised variable']).T
iloc = self.revisions_iloc
if len(iloc) > 0:
self.revisions_ix = pd.DataFrame({
'revision date': index[iloc['revision date']],
'revised variable': columns[iloc['revised variable']]})
else:
self.revisions_ix = iloc.copy()
self.updates_iloc = pd.DataFrame(
list(zip(*news_results.updates_ix)),
index=['update date', 'updated variable']).T
iloc = self.updates_iloc
if len(iloc) > 0:
self.updates_ix = pd.DataFrame({
'update date': index[iloc['update date']],
'updated variable': columns[iloc['updated variable']]})
else:
self.updates_ix = iloc.copy()
# Wrap forecasts and forecasts errors
ix = pd.MultiIndex.from_arrays([self.updates_ix['update date'],
self.updates_ix['updated variable']])
# E[y^u | post] - E[y^u | previous]
if news_results.news is None:
self.news = pd.Series([], index=ix, name='news',
dtype=model.params.dtype)
else:
self.news = pd.Series(news_results.news, index=ix, name='news')
# E[y^u | previous]
if news_results.update_forecasts is None:
self.update_forecasts = pd.Series([], index=ix,
dtype=model.params.dtype)
else:
self.update_forecasts = pd.Series(
news_results.update_forecasts, index=ix)
# y^u
if news_results.update_realized is None:
self.update_realized = pd.Series([], index=ix,
dtype=model.params.dtype)
else:
self.update_realized = pd.Series(
news_results.update_realized, index=ix)
cols = pd.MultiIndex.from_product([self.row_labels, columns])
# reshaped version of gain matrix E[y A'] E[A A']^{-1}
if len(self.updates_iloc):
weights = news_results.gain.transpose(0, 1, 2).reshape(
len(cols), len(ix))
else:
weights = np.zeros((len(cols), len(ix)))
self.weights = pd.DataFrame(weights, index=cols, columns=ix).T
self.weights.columns.names = ['impact date', 'impacted variable']
@property
def impacted_variable(self):
return self._impacted_variable
@impacted_variable.setter
def impacted_variable(self, value):
self._impacted_variable = value
@property
def tolerance(self):
return self._tolerance
@tolerance.setter
def tolerance(self, value):
self._tolerance = value
@property
def data_revisions(self):
"""
Revisions to data points that existed in the previous dataset
Returns
-------
data_revisions : pd.DataFrame
Index is as MultiIndex consisting of `revision date` and
`revised variable`. The columns are:
- `observed (prev)`: the value of the data as it was observed
in the previous dataset.
- `revised`: the revised value of the data, as it is observed
in the new dataset
See also
--------
data_updates
"""
# Save revisions data
data = self.revisions_ix.copy()
data['observed (prev)'] = [
self.previous.model.endog[row[0], row[1]]
for _, row in self.revisions_iloc.iterrows()]
data['revised'] = [
self.updated.model.endog[row[0], row[1]]
for _, row in self.revisions_iloc.iterrows()]
data.index = pd.MultiIndex.from_arrays([data['revision date'],
data['revised variable']])
data = data.sort_index().drop(['revision date',
'revised variable'], axis=1)
return data
@property
def data_updates(self):
"""
Updated data; new entries that did not exist in the previous dataset
Returns
-------
data_updates : pd.DataFrame
Index is as MultiIndex consisting of `update date` and
`updated variable`. The columns are:
- `forecast (prev)`: the previous forecast of the new entry,
based on the information available in the previous dataset
(recall that for these updated data points, the previous dataset
had no observed value for them at all)
- `observed`: the value of the new entry, as it is observed in the
new dataset
See also
--------
data_updates
"""
data = pd.concat([self.update_realized, self.update_forecasts],
axis=1).sort_index().reset_index()
data.columns = (data.columns[:2].tolist() +
['observed', 'forecast (prev)'])
data.index = pd.MultiIndex.from_arrays([data['update date'],
data['updated variable']])
data = data.sort_index().drop(['update date',
'updated variable'], axis=1)
return data
@property
def details_by_impact(self):
"""
Details of forecast revisions from news, organized by impacts first
Returns
-------
details : pd.DataFrame
Index is as MultiIndex consisting of:
- `impact date`: the date of the impact on the variable of interest
- `impacted variable`: the variable that is being impacted
- `update date`: the date of the data update, that results in
`news` that impacts the forecast of variables of interest
- `updated variable`: the variable being updated, that results in
`news` that impacts the forecast of variables of interest
The columns are:
- `forecast (prev)`: the previous forecast of the new entry,
based on the information available in the previous dataset
- `observed`: the value of the new entry, as it is observed in the
new dataset
- `news`: the news associated with the update (this is just the
forecast error: `observed` - `forecast (prev)`)
- `weight`: the weight describing how the `news` effects the
forecast of the variable of interest
- `impact`: the impact of the `news` on the forecast of the
variable of interest
Notes
-----
This table decomposes updated forecasts of variables of interest from
the `news` associated with each updated datapoint from the new data
release.
This table does not summarize the impacts or show the effect of
revisions. That information can be found in the `impacts` table.
This form of the details table is organized so that the impacted
dates / variables are first in the index. This is convenient for
slicing by impacted variables / dates to view the details of data
updates for a particular variable or date.
However, since the `forecast (prev)` and `observed` columns have a lot
of duplication, printing the entire table gives a result that is less
easy to parse than that produced by the `details_by_update` property.
`details_by_update` contains the same information but is organized to
be more convenient for displaying the entire table of detailed updates.
At the same time, `details_by_update` is less convenient for
subsetting.
See Also
--------
details_by_update
impacts
"""
df = self.weights.stack(level=[0, 1]).rename('weight').to_frame()
if len(self.updates_iloc):
df['forecast (prev)'] = self.update_forecasts
df['observed'] = self.update_realized
df['news'] = self.news
df['impact'] = df['news'] * df['weight']
else:
df['forecast (prev)'] = []
df['observed'] = []
df['news'] = []
df['impact'] = []
df = df[['observed', 'forecast (prev)', 'news', 'weight', 'impact']]
df = df.reorder_levels([2, 3, 0, 1]).sort_index()
if self.impacted_variable is not None and len(df) > 0:
df = df.loc[np.s_[:, self.impacted_variable], :]
mask = np.abs(df['weight']) > self.tolerance
return df[mask]
@property
def details_by_update(self):
"""
Details of forecast revisions from news, organized by updates first
Returns
-------
details : pd.DataFrame
Index is as MultiIndex consisting of:
- `update date`: the date of the data update, that results in
`news` that impacts the forecast of variables of interest
- `updated variable`: the variable being updated, that results in
`news` that impacts the forecast of variables of interest
- `forecast (prev)`: the previous forecast of the new entry,
based on the information available in the previous dataset
- `observed`: the value of the new entry, as it is observed in the
new dataset
- `impact date`: the date of the impact on the variable of interest
- `impacted variable`: the variable that is being impacted
The columns are:
- `news`: the news associated with the update (this is just the
forecast error: `observed` - `forecast (prev)`)
- `weight`: the weight describing how the `news` effects the
forecast of the variable of interest
- `impact`: the impact of the `news` on the forecast of the
variable of interest
Notes
-----
This table decomposes updated forecasts of variables of interest from
the `news` associated with each updated datapoint from the new data
release.
This table does not summarize the impacts or show the effect of
revisions. That information can be found in the `impacts` table.
This form of the details table is organized so that the updated
dates / variables are first in the index, and in this table the index
also contains the forecasts and observed values of the updates. This is
convenient for displaying the entire table of detailed updates because
it allows sparsifying duplicate entries.
However, since it includes forecasts and observed values in the index
of the table, it is not convenient for subsetting by the variable of
interest. Instead, the `details_by_impact` property is organized to
make slicing by impacted variables / dates easy. This allows, for
example, viewing the details of data updates on a particular variable
or date of interest.
See Also
--------
details_by_impact
impacts
"""
df = self.weights.stack(level=[0, 1]).rename('weight').to_frame()
if len(self.updates_iloc):
df['forecast (prev)'] = self.update_forecasts
df['observed'] = self.update_realized
df['news'] = self.news
df['impact'] = df['news'] * df['weight']
else:
df['forecast (prev)'] = []
df['observed'] = []
df['news'] = []
df['impact'] = []
df = df[['forecast (prev)', 'observed', 'news',
'weight', 'impact']]
df = df.reset_index()
keys = ['update date', 'updated variable', 'observed',
'forecast (prev)', 'impact date', 'impacted variable']
df.index = pd.MultiIndex.from_arrays([df[key] for key in keys])
details = df.drop(keys, axis=1).sort_index()
if self.impacted_variable is not None and len(df) > 0:
details = details.loc[
np.s_[:, :, :, :, :, self.impacted_variable], :]
mask = np.abs(details['weight']) > self.tolerance
return details[mask]
@property
def impacts(self):
"""
Impacts from news and revisions on all dates / variables of interest
Returns
-------
impacts : pd.DataFrame
Index is as MultiIndex consisting of:
- `impact date`: the date of the impact on the variable of interest
- `impacted variable`: the variable that is being impacted
The columns are:
- `estimate (prev)`: the previous estimate / forecast of the
date / variable of interest.
- `impact of revisions`: the impact of all data revisions on
the estimate of the date / variable of interest.
- `impact of news`: the impact of all news on the estimate of
the date / variable of interest.
- `total impact`: the total impact of both revisions and news on
the estimate of the date / variable of interest.
- `estimate (new)`: the new estimate / forecast of the
date / variable of interest after taking into account the effects
of the revisions and news.
Notes
-----
This table decomposes updated forecasts of variables of interest into
the overall effect from revisions and news.
This table does not break down the detail by the updated
dates / variables. That information can be found in the
`details_by_impact` `details_by_update` tables.
See Also
--------
details_by_impact
details_by_update
"""
# Summary of impacts
impacts = pd.concat([
self.prev_impacted_forecasts.unstack().rename('estimate (prev)'),
self.revision_impacts.unstack().rename('impact of revisions'),
self.update_impacts.unstack().rename('impact of news'),
self.post_impacted_forecasts.unstack().rename('estimate (new)')],
axis=1)
impacts['impact of revisions'] = (
impacts['impact of revisions'].fillna(0))
impacts['impact of news'] = (
impacts['impact of news'].fillna(0))
impacts['total impact'] = (impacts['impact of revisions'] +
impacts['impact of news'])
impacts = impacts.reorder_levels([1, 0]).sort_index()
impacts.index.names = ['impact date', 'impacted variable']
impacts = impacts[['estimate (prev)', 'impact of revisions',
'impact of news', 'total impact', 'estimate (new)']]
if self.impacted_variable is not None:
impacts = impacts.loc[np.s_[:, self.impacted_variable], :]
tmp = np.abs(impacts[['impact of revisions', 'impact of news']])
mask = (tmp > self.tolerance).any(axis=1)
return impacts[mask]
def summary_impacts(self, impact_date=None, impacted_variable=None,
groupby='impact date', show_revisions_columns=None,
sparsify=True, float_format='%.2f'):
"""
Create summary table with detailed impacts from news; by date, variable
Parameters
----------
impact_date : int, str, datetime, list, array, or slice, optional
Observation index label or slice of labels specifying particular
impact periods to display. The impact date(s) describe the periods
in which impacted variables were *affected* by the news. If this
argument is given, the output table will only show this impact date
or dates. Note that this argument is passed to the Pandas `loc`
accessor, and so it should correspond to the labels of the model's
index. If the model was created with data in a list or numpy array,
then these labels will be zero-indexes observation integers.
impacted_variable : str, list, array, or slice, optional
Observation variable label or slice of labels specifying particular
impacted variables to display. The impacted variable(s) describe
the variables that were *affected* by the news. If you do not know
the labels for the variables, check the `endog_names` attribute of
the model instance.
groupby : {impact date, impacted date}
The primary variable for grouping results in the impacts table. The
default is to group by update date.
show_revisions_columns : bool, optional
If set to False, the impacts table will not show the impacts from
data revisions or the total impacts. Default is to show the
revisions and totals columns if any revisions were made and
otherwise to hide them.
sparsify : bool, optional, default True
Set to False for the table to include every one of the multiindex
keys at each row.
float_format : str, optional
Formatter format string syntax for converting numbers to strings.
Default is '%.2f'.
Returns
-------
impacts_table : SimpleTable
Table describing total impacts from both revisions and news. See
the documentation for the `impacts` attribute for more details
about the index and columns.
See Also
--------
impacts
"""
# Squeeze for univariate models
if impacted_variable is None and self.updated.model.k_endog == 1:
impacted_variable = self.updated.model.endog_names
# Default is to only show the revisions columns if there were any
# revisions (otherwise it would just be a column of zeros)
if show_revisions_columns is None:
show_revisions_columns = len(self.revisions_iloc) > 0
# Select only the variables / dates of interest
s = list(np.s_[:, :])
if impact_date is not None:
s[0] = np.s_[impact_date]
if impacted_variable is not None:
s[1] = np.s_[impacted_variable]
s = tuple(s)
impacts = self.impacts.loc[s, :]
# Make the first index level the groupby level
groupby = groupby.lower()
if groupby in ['impacted variable', 'impacted_variable']:
impacts.index = impacts.index.swaplevel(1, 0)
elif groupby not in ['impact date', 'impact_date']:
raise ValueError('Invalid groupby for impacts table. Valid options'
' are "impact date" or "impacted variable".'
f'Got "{groupby}".')
impacts = impacts.sort_index()
# Drop the non-groupby level if there's only one value
tmp_index = impacts.index.remove_unused_levels()
k_vars = len(tmp_index.levels[1])
removed_level = None
if sparsify and k_vars == 1:
name = tmp_index.names[1]
value = tmp_index.levels[1][0]
removed_level = f'{name} = {value}'
impacts.index = tmp_index.droplevel(1)
impacts = impacts.applymap(
lambda num: '' if pd.isnull(num) else float_format % num)
impacts = impacts.reset_index()
impacts.iloc[:, 0] = impacts.iloc[:, 0].map(str)
else:
impacts = impacts.reset_index()
impacts.iloc[:, :2] = impacts.iloc[:, :2].applymap(str)
impacts.iloc[:, 2:] = impacts.iloc[:, 2:].applymap(
lambda num: '' if pd.isnull(num) else float_format % num)
# Sparsify the groupby column
if sparsify and groupby in impacts:
mask = impacts[groupby] == impacts[groupby].shift(1)
tmp = impacts.loc[mask, groupby]
if len(tmp) > 0:
impacts.loc[mask, groupby] = ''
# Drop revisions and totals columns if applicable
if not show_revisions_columns:
impacts.drop(['impact of revisions', 'total impact'], axis=1,
inplace=True)
params_data = impacts.values
params_header = impacts.columns.tolist()
params_stubs = None
title = 'Impacts'
if removed_level is not None:
join = 'on' if groupby == 'date' else 'for'
title += f' {join} [{removed_level}]'
impacts_table = SimpleTable(
params_data, params_header, params_stubs,
txt_fmt=fmt_params, title=title)
return impacts_table
def summary_details(self, impact_date=None, impacted_variable=None,
update_date=None, updated_variable=None,
groupby='update date', sparsify=True,
float_format='%.2f', multiple_tables=False):
"""
Create summary table with detailed impacts from news; by date, variable
Parameters
----------
impact_date : int, str, datetime, list, array, or slice, optional
Observation index label or slice of labels specifying particular
impact periods to display. The impact date(s) describe the periods
in which impacted variables were *affected* by the news. If this
argument is given, the output table will only show this impact date
or dates. Note that this argument is passed to the Pandas `loc`
accessor, and so it should correspond to the labels of the model's
index. If the model was created with data in a list or numpy array,
then these labels will be zero-indexes observation integers.
impacted_variable : str, list, array, or slice, optional
Observation variable label or slice of labels specifying particular
impacted variables to display. The impacted variable(s) describe
the variables that were *affected* by the news. If you do not know
the labels for the variables, check the `endog_names` attribute of
the model instance.
update_date : int, str, datetime, list, array, or slice, optional
Observation index label or slice of labels specifying particular
updated periods to display. The updated date(s) describe the
periods in which the new data points were available that generated
the news). See the note on `impact_date` for details about what
these labels are.
updated_variable : str, list, array, or slice, optional
Observation variable label or slice of labels specifying particular
updated variables to display. The updated variable(s) describe the
variables that were *affected* by the news. If you do not know the
labels for the variables, check the `endog_names` attribute of the
model instance.
groupby : {update date, updated date, impact date, impacted date}
The primary variable for grouping results in the details table. The
default is to group by update date.
sparsify : bool, optional, default True
Set to False for the table to include every one of the multiindex
keys at each row.
float_format : str, optional
Formatter format string syntax for converting numbers to strings.
Default is '%.2f'.
multiple_tables : bool, optional
If set to True, this function will return a list of tables, one
table for each of the unique `groupby` levels. Default is False,
in which case this function returns a single table.
Returns
-------
details_table : SimpleTable or list of SimpleTable
Table or list of tables describing how the news from each update
(i.e. news from a particular variable / date) translates into
changes to the forecasts of each impacted variable variable / date.
This table contains information about the updates and about the
impacts. Updates are newly observed datapoints that were not
available in the previous results set. Each update leads to news,
and the news may cause changes in the forecasts of the impacted
variables. The amount that a particular piece of news (from an
update to some variable at some date) impacts a variable at some
date depends on weights that can be computed from the model
results.
The data contained in this table that refer to updates are:
- `update date` : The date at which a new datapoint was added.
- `updated variable` : The variable for which a new datapoint was
added.
- `forecast (prev)` : The value that had been forecast by the
previous model for the given updated variable and date.
- `observed` : The observed value of the new datapoint.
- `news` : The news is the difference between the observed value
and the previously forecast value for a given updated variable
and date.
The data contained in this table that refer to impacts are:
- `impact date` : A date associated with an impact.
- `impacted variable` : A variable that was impacted by the news.
- `weight` : The weight of news from a given `update date` and
`update variable` on a given `impacted variable` at a given
`impact date`.
- `impact` : The revision to the smoothed estimate / forecast of
the impacted variable at the impact date based specifically on
the news generated by the `updated variable` at the
`update date`.
See Also
--------
details_by_impact
details_by_update
"""
# Squeeze for univariate models
if self.updated.model.k_endog == 1:
if impacted_variable is None:
impacted_variable = self.updated.model.endog_names
if updated_variable is None:
updated_variable = self.updated.model.endog_names
# Select only the variables / dates of interest
s = list(np.s_[:, :, :, :, :, :])
if impact_date is not None:
s[0] = np.s_[impact_date]
if impacted_variable is not None:
s[1] = np.s_[impacted_variable]
if update_date is not None:
s[2] = np.s_[update_date]
if updated_variable is not None:
s[3] = np.s_[updated_variable]
s = tuple(s)
details = self.details_by_impact.loc[s, :]
# Make the first index level the groupby level
groupby = groupby.lower().replace('_', ' ')
groupby_overall = 'impact'
levels_order = [0, 1, 2, 3]
if groupby == 'update date':
levels_order = [2, 3, 0, 1]
groupby_overall = 'update'
elif groupby == 'updated variable':
levels_order = [3, 2, 1, 0]
groupby_overall = 'update'
elif groupby == 'impacted variable':
levels_order = [1, 0, 3, 2]
elif groupby != 'impact date':
raise ValueError('Invalid groupby for details table. Valid options'
' are "update date", "updated variable",'
' "impact date",or "impacted variable".'
f' Got "{groupby}".')
details.index = (details.index.reorder_levels(levels_order)
.remove_unused_levels())
details = details.sort_index()
# If our overall group-by is `update`, move forecast (prev) and
# observed into the index
base_levels = [0, 1, 2, 3]
if groupby_overall == 'update':
details.set_index(['observed', 'forecast (prev)'], append=True,
inplace=True)
details.index = details.index.reorder_levels([0, 1, 4, 5, 2, 3])
base_levels = [0, 1, 4, 5]
# Drop the non-groupby levels if there's only one value
tmp_index = details.index.remove_unused_levels()
n_levels = len(tmp_index.levels)
k_level_values = [len(tmp_index.levels[i]) for i in range(n_levels)]
removed_levels = []
if sparsify:
for i in sorted(base_levels)[::-1][:-1]:
if k_level_values[i] == 1:
name = tmp_index.names[i]
value = tmp_index.levels[i][0]
can_drop = (
(name == 'update date' and update_date is not None) or
(name == 'updated variable' and
updated_variable is not None) or
(name == 'impact date' and impact_date is not None) or
(name == 'impacted variable' and
(impacted_variable is not None or
self.impacted_variable is not None)))
if can_drop or not multiple_tables:
removed_levels.insert(0, f'{name} = {value}')
details.index = tmp_index = tmp_index.droplevel(i)
# Move everything to columns
details = details.reset_index()
# Function for formatting numbers
def str_format(num, mark_ones=False, mark_zeroes=False):
if pd.isnull(num):
out = ''
elif mark_ones and np.abs(1 - num) < self.tolerance:
out = '1.0'
elif mark_zeroes and np.abs(num) < self.tolerance:
out = '0'
else:
out = float_format % num
return out
# Function to create the table
def create_table(details, removed_levels):
# Convert everything to strings
for key in ['observed', 'forecast (prev)', 'news', 'weight',
'impact']:
if key in details:
args = (
# mark_ones
True if key in ['weight'] else False,
# mark_zeroes
True if key in ['weight', 'impact'] else False)
details[key] = details[key].apply(str_format, args=args)
for key in ['update date', 'impact date']:
if key in details:
details[key] = details[key].apply(str)
# Sparsify index columns
if sparsify:
sparsify_cols = ['update date', 'updated variable',
'impact date', 'impacted variable']
if groupby_overall == 'update':
sparsify_cols += ['observed', 'forecast (prev)']
for key in sparsify_cols:
if key in details:
mask = details[key] == details[key].shift(1)
details.loc[mask, key] = ''
params_data = details.values
params_header = details.columns.tolist()
params_stubs = None
title = 'Details'
if len(removed_levels):
title += ' for [' + ', '.join(removed_levels) + ']'
return SimpleTable(params_data, params_header, params_stubs,
txt_fmt=fmt_params, title=title)
if multiple_tables:
details_table = []
for item in details[groupby].unique():
mask = details[groupby] == item
item_details = details[mask].drop(groupby, axis=1)
item_removed_levels = [f'{groupby} = {item}'] + removed_levels
details_table.append(create_table(item_details,
item_removed_levels))
else:
details_table = create_table(details, removed_levels)
return details_table
[docs] def summary_revisions(self, sparsify=True):
"""
Create summary table showing revisions to the previous results' data
Parameters
----------
sparsify : bool, optional, default True
Set to False for the table to include every one of the multiindex
keys at each row.
Returns
-------
revisions_table : SimpleTable
Table showing revisions to the previous results' data. Columns are:
- `revision date` : date associated with a revised data point
- `revised variable` : variable that was revised at `revision date`
- `observed (prev)` : the observed value prior to the revision
- `revised` : the new value after the revision
"""
data = self.data_revisions.sort_index().reset_index()
data[['revision date', 'revised variable']] = (
data[['revision date', 'revised variable']].applymap(str))
data.iloc[:, 2:] = data.iloc[:, 2:].applymap(
lambda num: '' if pd.isnull(num) else '%.2f' % num)
# Sparsify the date column
if sparsify:
mask = data['revision date'] == data['revision date'].shift(1)
data.loc[mask, 'revision date'] = ''
params_data = data.values
params_header = data.columns.tolist()
params_stubs = None
title = 'Revisions to dataset:'
revisions_table = SimpleTable(
params_data, params_header, params_stubs,
txt_fmt=fmt_params, title=title)
return revisions_table
[docs] def summary_news(self, sparsify=True):
"""
Create summary table showing news from new data since previous results
Parameters
----------
sparsify : bool, optional, default True
Set to False for the table to include every one of the multiindex
keys at each row.
Returns
-------
updates_table : SimpleTable
Table showing new datapoints that were not in the previous results'
data. Columns are:
- `update date` : date associated with a new data point.
- `updated variable` : variable for which new data was added at
`update date`.
- `forecast (prev)` : the forecast value for the updated variable
at the update date in the previous results object (i.e. prior to
the data being available).
- `observed` : the observed value of the new datapoint.
See Also
--------
data_updates
"""
data = pd.merge(
self.data_updates, self.news, left_index=True,
right_index=True).sort_index().reset_index()
data[['update date', 'updated variable']] = (
data[['update date', 'updated variable']].applymap(str))
data.iloc[:, 2:] = data.iloc[:, 2:].applymap(
lambda num: '' if pd.isnull(num) else '%.2f' % num)
# Sparsify the date column
if sparsify:
mask = data['update date'] == data['update date'].shift(1)
data.loc[mask, 'update date'] = ''
params_data = data.values
params_header = data.columns.tolist()
params_stubs = None
title = 'News from updated observations:'
updates_table = SimpleTable(
params_data, params_header, params_stubs,
txt_fmt=fmt_params, title=title)
return updates_table
def summary(self, impact_date=None, impacted_variable=None,
update_date=None, updated_variable=None,
impacts_groupby='impact date', details_groupby='update date',
show_revisions_columns=None, sparsify=True,
include_details_tables=None, include_revisions_tables=False,
float_format='%.2f'):
"""
Create summary tables describing news and impacts
Parameters
----------
impact_date : int, str, datetime, list, array, or slice, optional
Observation index label or slice of labels specifying particular
impact periods to display. The impact date(s) describe the periods
in which impacted variables were *affected* by the news. If this
argument is given, the impact and details tables will only show
this impact date or dates. Note that this argument is passed to the
Pandas `loc` accessor, and so it should correspond to the labels of
the model's index. If the model was created with data in a list or
numpy array, then these labels will be zero-indexes observation
integers.
impacted_variable : str, list, array, or slice, optional
Observation variable label or slice of labels specifying particular
impacted variables to display. The impacted variable(s) describe
the variables that were *affected* by the news. If you do not know
the labels for the variables, check the `endog_names` attribute of
the model instance.
update_date : int, str, datetime, list, array, or slice, optional
Observation index label or slice of labels specifying particular
updated periods to display. The updated date(s) describe the
periods in which the new data points were available that generated
the news). See the note on `impact_date` for details about what
these labels are.
updated_variable : str, list, array, or slice, optional
Observation variable label or slice of labels specifying particular
updated variables to display. The updated variable(s) describe the
variables that were *affected* by the news. If you do not know the
labels for the variables, check the `endog_names` attribute of the
model instance.
impacts_groupby : {impact date, impacted date}
The primary variable for grouping results in the impacts table. The
default is to group by update date.
details_groupby : str
One of "update date", "updated date", "impact date", or
"impacted date". The primary variable for grouping results in the
details table. Only used if the details tables are included. The
default is to group by update date.
show_revisions_columns : bool, optional
If set to False, the impacts table will not show the impacts from
data revisions or the total impacts. Default is to show the
revisions and totals columns if any revisions were made and
otherwise to hide them.
sparsify : bool, optional, default True
Set to False for the table to include every one of the multiindex
keys at each row.
include_details_tables : bool, optional
If set to True, the summary will show tables describing the details
of how news from specific updates translate into specific impacts.
These tables can be very long, particularly in cases where there
were many updates and in multivariate models. The default is to
show detailed tables only for univariate models.
include_revisions_tables : bool, optional
If set to True, the summary will show tables describing the
revisions and updates that lead to impacts on variables of
interest.
float_format : str, optional
Formatter format string syntax for converting numbers to strings.
Default is '%.2f'.
Returns
-------
summary_tables : Summary
Summary tables describing news and impacts. Basic tables include:
- A table with general information about the sample.
- A table describing the impacts of revisions and news.
- Tables describing revisions in the dataset since the previous
results set (unless `include_revisions_tables=False`).
In univariate models or if `include_details_tables=True`, one or
more tables will additionally be included describing the details
of how news from specific updates translate into specific impacts.
See Also
--------
summary_impacts
summary_details
summary_revisions
summary_updates
"""
# Default for include_details_tables
if include_details_tables is None:
include_details_tables = self.updated.model.k_endog == 1
# Model specification results
model = self.model.model
title = 'News'
def get_sample(model):
if model._index_dates:
ix = model._index
d = ix[0]
sample = ['%s' % d]
d = ix[-1]
sample += ['- ' + '%s' % d]
else:
sample = [str(0), ' - ' + str(model.nobs)]
return sample
previous_sample = get_sample(self.previous.model)
revised_sample = get_sample(self.updated.model)
# Standardize the model name as a list of str
model_name = model.__class__.__name__
# Top summary table
top_left = [('Model:', [model_name]),
('Date:', None),
('Time:', None)]
top_right = [
('Original sample:', [previous_sample[0]]),
('', [previous_sample[1]]),
('Update through:', [revised_sample[1][2:]]),
('No. Revisions:', [len(self.revisions_ix)]),
('No. New datapoints:', [len(self.updates_ix)])]
summary = Summary()
self.model.endog_names = self.model.model.endog_names
summary.add_table_2cols(self, gleft=top_left, gright=top_right,
title=title)
table_ix = 1
# Impact table
summary.tables.insert(table_ix, self.summary_impacts(
impact_date=impact_date, impacted_variable=impacted_variable,
groupby=impacts_groupby,
show_revisions_columns=show_revisions_columns, sparsify=sparsify,
float_format=float_format))
table_ix += 1
# News table
if len(self.updates_iloc) > 0:
summary.tables.insert(
table_ix, self.summary_news(sparsify=sparsify))
table_ix += 1
# Detail tables
multiple_tables = self.updated.model.k_endog > 1
details_tables = self.summary_details(
impact_date=impact_date, impacted_variable=impacted_variable,
groupby=details_groupby, sparsify=sparsify,
float_format=float_format, multiple_tables=multiple_tables)
if not multiple_tables:
details_tables = [details_tables]
if include_details_tables:
for table in details_tables:
summary.tables.insert(table_ix, table)
table_ix += 1
# Revisions
if include_revisions_tables and len(self.revisions_iloc) > 0:
summary.tables.insert(
table_ix, self.summary_revisions(sparsify=sparsify))
table_ix += 1
return summary