Source code for statsmodels.tsa.statespace.news

# -*- coding: utf-8 -*-
"""
News for state space models

Author: Chad Fulton
License: BSD-3
"""

import numpy as np
import pandas as pd

from statsmodels.iolib.table import SimpleTable
from statsmodels.iolib.summary import Summary
from statsmodels.iolib.tableformatting import fmt_params


[docs]class NewsResults(object): """ Impacts of data revisions and news on estimates of variables of interest Parameters ---------- news_results : SimpleNamespace instance Results from `KalmanSmoother.news`. model : MLEResults The results object associated with the model from which the NewsResults was generated. updated : MLEResults The results object associated with the model containing the updated dataset. previous : MLEResults The results object associated with the model containing the previous dataset. impacted_variable : str, list, array, or slice, optional Observation variable label or slice of labels specifying particular impacted variables to display in output. The impacted variable(s) describe the variables that were *affected* by the news. If you do not know the labels for the variables, check the `endog_names` attribute of the model instance. tolerance : float, optional The numerical threshold for determining zero impact. Default is that any impact less than 1e-10 is assumed to be zero. row_labels : iterable Row labels (often dates) for the impacts of the revisions and news. Attributes ---------- total_impacts : pd.Series Updates to forecasts of impacted variables from both news and data revisions, E[y^i | post] - E[y^i | previous]. update_impacts : pd.Series Updates to forecasts of impacted variables from the news, E[y^i | post] - E[y^i | revisions] where y^i are the impacted variables of interest. revision_impacts : pd.Series Updates to forecasts of impacted variables from data revisions, E[y^i | revisions] - E[y^i | previous]. news : pd.Series The unexpected component of the updated data, E[y^u | post] - E[y^u | revisions] where y^u are the updated variables. weights : pd.Series Weights describing the effect of news on variables of interest. update_forecasts : pd.Series Forecasts based on the previous dataset of the variables that were updated, E[y^u | previous]. update_realized : pd.Series Actual observed data associated with the variables that were updated, y^u prev_impacted_forecasts : pd.Series Previous forecast of the variables of interest, E[y^i | previous]. post_impacted_forecasts : pd.Series Forecast of the variables of interest after taking into account both revisions and updates, E[y^i | post]. revisions_iloc : pd.DataFrame The integer locations of the data revisions in the dataset. revisions_ix : pd.DataFrame The label-based locations of the data revisions in the dataset. updates_iloc : pd.DataFrame The integer locations of the updated data points. updates_ix : pd.DataFrame The label-based locations of updated data points. References ---------- .. [1] Bańbura, Marta, and Michele Modugno. "Maximum likelihood estimation of factor models on datasets with arbitrary pattern of missing data." Journal of Applied Econometrics 29, no. 1 (2014): 133-160. .. [2] Bańbura, Marta, Domenico Giannone, and Lucrezia Reichlin. "Nowcasting." The Oxford Handbook of Economic Forecasting. July 8, 2011. .. [3] Bańbura, Marta, Domenico Giannone, Michele Modugno, and Lucrezia Reichlin. "Now-casting and the real-time data flow." In Handbook of economic forecasting, vol. 2, pp. 195-237. Elsevier, 2013. """ def __init__(self, news_results, model, updated, previous, impacted_variable=None, tolerance=1e-10, row_labels=None): # Note: `model` will be the same as one of `revised` or `previous`, but # we need to save it as self.model so that the `predict_dates`, which # were generated by the `_get_prediction_index` call, will be available # for use by the base wrapping code. self.model = model self.updated = updated self.previous = previous self.news_results = news_results self._impacted_variable = impacted_variable self._tolerance = tolerance self.row_labels = row_labels self.params = [] # required for `summary` to work columns = np.atleast_1d(self.updated.model.endog_names) # E[y^i | post] self.post_impacted_forecasts = pd.DataFrame( news_results.post_impacted_forecasts.T, index=self.row_labels, columns=columns) # E[y^i | previous] self.prev_impacted_forecasts = pd.DataFrame( news_results.prev_impacted_forecasts.T, index=self.row_labels, columns=columns) # E[y^i | post] - E[y^i | revisions] self.update_impacts = pd.DataFrame( news_results.update_impacts, index=self.row_labels, columns=columns) # E[y^i | revisions] - E[y^i | previous] self.revision_impacts = pd.DataFrame( news_results.revision_impacts, index=self.row_labels, columns=columns) # E[y^i | post] - E[y^i | previous] self.total_impacts = (self.post_impacted_forecasts - self.prev_impacted_forecasts) # Indices of revisions and updates index = self.updated.model._index self.revisions_iloc = pd.DataFrame( list(zip(*news_results.revisions_ix)), index=['revision date', 'revised variable']).T iloc = self.revisions_iloc if len(iloc) > 0: self.revisions_ix = pd.DataFrame({ 'revision date': index[iloc['revision date']], 'revised variable': columns[iloc['revised variable']]}) else: self.revisions_ix = iloc.copy() self.updates_iloc = pd.DataFrame( list(zip(*news_results.updates_ix)), index=['update date', 'updated variable']).T iloc = self.updates_iloc if len(iloc) > 0: self.updates_ix = pd.DataFrame({ 'update date': index[iloc['update date']], 'updated variable': columns[iloc['updated variable']]}) else: self.updates_ix = iloc.copy() # Wrap forecasts and forecasts errors ix = pd.MultiIndex.from_arrays([self.updates_ix['update date'], self.updates_ix['updated variable']]) # E[y^u | post] - E[y^u | previous] if news_results.news is None: self.news = pd.Series([], index=ix, name='news', dtype=model.params.dtype) else: self.news = pd.Series(news_results.news, index=ix, name='news') # E[y^u | previous] if news_results.update_forecasts is None: self.update_forecasts = pd.Series([], index=ix, dtype=model.params.dtype) else: self.update_forecasts = pd.Series( news_results.update_forecasts, index=ix) # y^u if news_results.update_realized is None: self.update_realized = pd.Series([], index=ix, dtype=model.params.dtype) else: self.update_realized = pd.Series( news_results.update_realized, index=ix) cols = pd.MultiIndex.from_product([self.row_labels, columns]) # reshaped version of gain matrix E[y A'] E[A A']^{-1} if len(self.updates_iloc): weights = news_results.gain.transpose(0, 1, 2).reshape( len(cols), len(ix)) else: weights = np.zeros((len(cols), len(ix))) self.weights = pd.DataFrame(weights, index=cols, columns=ix).T self.weights.columns.names = ['impact date', 'impacted variable'] @property def impacted_variable(self): return self._impacted_variable @impacted_variable.setter def impacted_variable(self, value): self._impacted_variable = value @property def tolerance(self): return self._tolerance @tolerance.setter def tolerance(self, value): self._tolerance = value @property def data_revisions(self): """ Revisions to data points that existed in the previous dataset Returns ------- data_revisions : pd.DataFrame Index is as MultiIndex consisting of `revision date` and `revised variable`. The columns are: - `observed (prev)`: the value of the data as it was observed in the previous dataset. - `revised`: the revised value of the data, as it is observed in the new dataset See also -------- data_updates """ # Save revisions data data = self.revisions_ix.copy() data['observed (prev)'] = [ self.previous.model.endog[row[0], row[1]] for _, row in self.revisions_iloc.iterrows()] data['revised'] = [ self.updated.model.endog[row[0], row[1]] for _, row in self.revisions_iloc.iterrows()] data.index = pd.MultiIndex.from_arrays([data['revision date'], data['revised variable']]) data = data.sort_index().drop(['revision date', 'revised variable'], axis=1) return data @property def data_updates(self): """ Updated data; new entries that did not exist in the previous dataset Returns ------- data_updates : pd.DataFrame Index is as MultiIndex consisting of `update date` and `updated variable`. The columns are: - `forecast (prev)`: the previous forecast of the new entry, based on the information available in the previous dataset (recall that for these updated data points, the previous dataset had no observed value for them at all) - `observed`: the value of the new entry, as it is observed in the new dataset See also -------- data_updates """ data = pd.concat([self.update_realized, self.update_forecasts], axis=1).sort_index().reset_index() data.columns = (data.columns[:2].tolist() + ['observed', 'forecast (prev)']) data.index = pd.MultiIndex.from_arrays([data['update date'], data['updated variable']]) data = data.sort_index().drop(['update date', 'updated variable'], axis=1) return data @property def details_by_impact(self): """ Details of forecast revisions from news, organized by impacts first Returns ------- details : pd.DataFrame Index is as MultiIndex consisting of: - `impact date`: the date of the impact on the variable of interest - `impacted variable`: the variable that is being impacted - `update date`: the date of the data update, that results in `news` that impacts the forecast of variables of interest - `updated variable`: the variable being updated, that results in `news` that impacts the forecast of variables of interest The columns are: - `forecast (prev)`: the previous forecast of the new entry, based on the information available in the previous dataset - `observed`: the value of the new entry, as it is observed in the new dataset - `news`: the news associated with the update (this is just the forecast error: `observed` - `forecast (prev)`) - `weight`: the weight describing how the `news` effects the forecast of the variable of interest - `impact`: the impact of the `news` on the forecast of the variable of interest Notes ----- This table decomposes updated forecasts of variables of interest from the `news` associated with each updated datapoint from the new data release. This table does not summarize the impacts or show the effect of revisions. That information can be found in the `impacts` table. This form of the details table is organized so that the impacted dates / variables are first in the index. This is convenient for slicing by impacted variables / dates to view the details of data updates for a particular variable or date. However, since the `forecast (prev)` and `observed` columns have a lot of duplication, printing the entire table gives a result that is less easy to parse than that produced by the `details_by_update` property. `details_by_update` contains the same information but is organized to be more convenient for displaying the entire table of detailed updates. At the same time, `details_by_update` is less convenient for subsetting. See Also -------- details_by_update impacts """ df = self.weights.stack(level=[0, 1]).rename('weight').to_frame() if len(self.updates_iloc): df['forecast (prev)'] = self.update_forecasts df['observed'] = self.update_realized df['news'] = self.news df['impact'] = df['news'] * df['weight'] else: df['forecast (prev)'] = [] df['observed'] = [] df['news'] = [] df['impact'] = [] df = df[['observed', 'forecast (prev)', 'news', 'weight', 'impact']] df = df.reorder_levels([2, 3, 0, 1]).sort_index() if self.impacted_variable is not None and len(df) > 0: df = df.loc[np.s_[:, self.impacted_variable], :] mask = np.abs(df['weight']) > self.tolerance return df[mask] @property def details_by_update(self): """ Details of forecast revisions from news, organized by updates first Returns ------- details : pd.DataFrame Index is as MultiIndex consisting of: - `update date`: the date of the data update, that results in `news` that impacts the forecast of variables of interest - `updated variable`: the variable being updated, that results in `news` that impacts the forecast of variables of interest - `forecast (prev)`: the previous forecast of the new entry, based on the information available in the previous dataset - `observed`: the value of the new entry, as it is observed in the new dataset - `impact date`: the date of the impact on the variable of interest - `impacted variable`: the variable that is being impacted The columns are: - `news`: the news associated with the update (this is just the forecast error: `observed` - `forecast (prev)`) - `weight`: the weight describing how the `news` effects the forecast of the variable of interest - `impact`: the impact of the `news` on the forecast of the variable of interest Notes ----- This table decomposes updated forecasts of variables of interest from the `news` associated with each updated datapoint from the new data release. This table does not summarize the impacts or show the effect of revisions. That information can be found in the `impacts` table. This form of the details table is organized so that the updated dates / variables are first in the index, and in this table the index also contains the forecasts and observed values of the updates. This is convenient for displaying the entire table of detailed updates because it allows sparsifying duplicate entries. However, since it includes forecasts and observed values in the index of the table, it is not convenient for subsetting by the variable of interest. Instead, the `details_by_impact` property is organized to make slicing by impacted variables / dates easy. This allows, for example, viewing the details of data updates on a particular variable or date of interest. See Also -------- details_by_impact impacts """ df = self.weights.stack(level=[0, 1]).rename('weight').to_frame() if len(self.updates_iloc): df['forecast (prev)'] = self.update_forecasts df['observed'] = self.update_realized df['news'] = self.news df['impact'] = df['news'] * df['weight'] else: df['forecast (prev)'] = [] df['observed'] = [] df['news'] = [] df['impact'] = [] df = df[['forecast (prev)', 'observed', 'news', 'weight', 'impact']] df = df.reset_index() keys = ['update date', 'updated variable', 'observed', 'forecast (prev)', 'impact date', 'impacted variable'] df.index = pd.MultiIndex.from_arrays([df[key] for key in keys]) details = df.drop(keys, axis=1).sort_index() if self.impacted_variable is not None and len(df) > 0: details = details.loc[ np.s_[:, :, :, :, :, self.impacted_variable], :] mask = np.abs(details['weight']) > self.tolerance return details[mask] @property def impacts(self): """ Impacts from news and revisions on all dates / variables of interest Returns ------- impacts : pd.DataFrame Index is as MultiIndex consisting of: - `impact date`: the date of the impact on the variable of interest - `impacted variable`: the variable that is being impacted The columns are: - `estimate (prev)`: the previous estimate / forecast of the date / variable of interest. - `impact of revisions`: the impact of all data revisions on the estimate of the date / variable of interest. - `impact of news`: the impact of all news on the estimate of the date / variable of interest. - `total impact`: the total impact of both revisions and news on the estimate of the date / variable of interest. - `estimate (new)`: the new estimate / forecast of the date / variable of interest after taking into account the effects of the revisions and news. Notes ----- This table decomposes updated forecasts of variables of interest into the overall effect from revisions and news. This table does not break down the detail by the updated dates / variables. That information can be found in the `details_by_impact` `details_by_update` tables. See Also -------- details_by_impact details_by_update """ # Summary of impacts impacts = pd.concat([ self.prev_impacted_forecasts.unstack().rename('estimate (prev)'), self.revision_impacts.unstack().rename('impact of revisions'), self.update_impacts.unstack().rename('impact of news'), self.post_impacted_forecasts.unstack().rename('estimate (new)')], axis=1) impacts['impact of revisions'] = ( impacts['impact of revisions'].fillna(0)) impacts['impact of news'] = ( impacts['impact of news'].fillna(0)) impacts['total impact'] = (impacts['impact of revisions'] + impacts['impact of news']) impacts = impacts.reorder_levels([1, 0]).sort_index() impacts.index.names = ['impact date', 'impacted variable'] impacts = impacts[['estimate (prev)', 'impact of revisions', 'impact of news', 'total impact', 'estimate (new)']] if self.impacted_variable is not None: impacts = impacts.loc[np.s_[:, self.impacted_variable], :] tmp = np.abs(impacts[['impact of revisions', 'impact of news']]) mask = (tmp > self.tolerance).any(axis=1) return impacts[mask]
[docs] def summary_impacts(self, impact_date=None, impacted_variable=None, groupby='impact date', show_revisions_columns=None, sparsify=True, float_format='%.2f'): """ Create summary table with detailed impacts from news; by date, variable Parameters ---------- impact_date : int, str, datetime, list, array, or slice, optional Observation index label or slice of labels specifying particular impact periods to display. The impact date(s) describe the periods in which impacted variables were *affected* by the news. If this argument is given, the output table will only show this impact date or dates. Note that this argument is passed to the Pandas `loc` accessor, and so it should correspond to the labels of the model's index. If the model was created with data in a list or numpy array, then these labels will be zero-indexes observation integers. impacted_variable : str, list, array, or slice, optional Observation variable label or slice of labels specifying particular impacted variables to display. The impacted variable(s) describe the variables that were *affected* by the news. If you do not know the labels for the variables, check the `endog_names` attribute of the model instance. groupby : {impact date, impacted date} The primary variable for grouping results in the impacts table. The default is to group by update date. show_revisions_columns : bool, optional If set to False, the impacts table will not show the impacts from data revisions or the total impacts. Default is to show the revisions and totals columns if any revisions were made and otherwise to hide them. sparsify : bool, optional, default True Set to False for the table to include every one of the multiindex keys at each row. float_format : str, optional Formatter format string syntax for converting numbers to strings. Default is '%.2f'. Returns ------- impacts_table : SimpleTable Table describing total impacts from both revisions and news. See the documentation for the `impacts` attribute for more details about the index and columns. See Also -------- impacts """ # Squeeze for univariate models if impacted_variable is None and self.updated.model.k_endog == 1: impacted_variable = self.updated.model.endog_names # Default is to only show the revisions columns if there were any # revisions (otherwise it would just be a column of zeros) if show_revisions_columns is None: show_revisions_columns = len(self.revisions_iloc) > 0 # Select only the variables / dates of interest s = list(np.s_[:, :]) if impact_date is not None: s[0] = np.s_[impact_date] if impacted_variable is not None: s[1] = np.s_[impacted_variable] s = tuple(s) impacts = self.impacts.loc[s, :] # Make the first index level the groupby level groupby = groupby.lower() if groupby in ['impacted variable', 'impacted_variable']: impacts.index = impacts.index.swaplevel(1, 0) elif groupby not in ['impact date', 'impact_date']: raise ValueError('Invalid groupby for impacts table. Valid options' ' are "impact date" or "impacted variable".' f'Got "{groupby}".') impacts = impacts.sort_index() # Drop the non-groupby level if there's only one value tmp_index = impacts.index.remove_unused_levels() k_vars = len(tmp_index.levels[1]) removed_level = None if sparsify and k_vars == 1: name = tmp_index.names[1] value = tmp_index.levels[1][0] removed_level = f'{name} = {value}' impacts.index = tmp_index.droplevel(1) impacts = impacts.applymap( lambda num: '' if pd.isnull(num) else float_format % num) impacts = impacts.reset_index() impacts.iloc[:, 0] = impacts.iloc[:, 0].map(str) else: impacts = impacts.reset_index() impacts.iloc[:, :2] = impacts.iloc[:, :2].applymap(str) impacts.iloc[:, 2:] = impacts.iloc[:, 2:].applymap( lambda num: '' if pd.isnull(num) else float_format % num) # Sparsify the groupby column if sparsify and groupby in impacts: mask = impacts[groupby] == impacts[groupby].shift(1) tmp = impacts.loc[mask, groupby] if len(tmp) > 0: impacts.loc[mask, groupby] = '' # Drop revisions and totals columns if applicable if not show_revisions_columns: impacts.drop(['impact of revisions', 'total impact'], axis=1, inplace=True) params_data = impacts.values params_header = impacts.columns.tolist() params_stubs = None title = 'Impacts' if removed_level is not None: join = 'on' if groupby == 'date' else 'for' title += f' {join} [{removed_level}]' impacts_table = SimpleTable( params_data, params_header, params_stubs, txt_fmt=fmt_params, title=title) return impacts_table
[docs] def summary_details(self, impact_date=None, impacted_variable=None, update_date=None, updated_variable=None, groupby='update date', sparsify=True, float_format='%.2f', multiple_tables=False): """ Create summary table with detailed impacts from news; by date, variable Parameters ---------- impact_date : int, str, datetime, list, array, or slice, optional Observation index label or slice of labels specifying particular impact periods to display. The impact date(s) describe the periods in which impacted variables were *affected* by the news. If this argument is given, the output table will only show this impact date or dates. Note that this argument is passed to the Pandas `loc` accessor, and so it should correspond to the labels of the model's index. If the model was created with data in a list or numpy array, then these labels will be zero-indexes observation integers. impacted_variable : str, list, array, or slice, optional Observation variable label or slice of labels specifying particular impacted variables to display. The impacted variable(s) describe the variables that were *affected* by the news. If you do not know the labels for the variables, check the `endog_names` attribute of the model instance. update_date : int, str, datetime, list, array, or slice, optional Observation index label or slice of labels specifying particular updated periods to display. The updated date(s) describe the periods in which the new data points were available that generated the news). See the note on `impact_date` for details about what these labels are. updated_variable : str, list, array, or slice, optional Observation variable label or slice of labels specifying particular updated variables to display. The updated variable(s) describe the variables that were *affected* by the news. If you do not know the labels for the variables, check the `endog_names` attribute of the model instance. groupby : {update date, updated date, impact date, impacted date} The primary variable for grouping results in the details table. The default is to group by update date. sparsify : bool, optional, default True Set to False for the table to include every one of the multiindex keys at each row. float_format : str, optional Formatter format string syntax for converting numbers to strings. Default is '%.2f'. multiple_tables : bool, optional If set to True, this function will return a list of tables, one table for each of the unique `groupby` levels. Default is False, in which case this function returns a single table. Returns ------- details_table : SimpleTable or list of SimpleTable Table or list of tables describing how the news from each update (i.e. news from a particular variable / date) translates into changes to the forecasts of each impacted variable variable / date. This table contains information about the updates and about the impacts. Updates are newly observed datapoints that were not available in the previous results set. Each update leads to news, and the news may cause changes in the forecasts of the impacted variables. The amount that a particular piece of news (from an update to some variable at some date) impacts a variable at some date depends on weights that can be computed from the model results. The data contained in this table that refer to updates are: - `update date` : The date at which a new datapoint was added. - `updated variable` : The variable for which a new datapoint was added. - `forecast (prev)` : The value that had been forecast by the previous model for the given updated variable and date. - `observed` : The observed value of the new datapoint. - `news` : The news is the difference between the observed value and the previously forecast value for a given updated variable and date. The data contained in this table that refer to impacts are: - `impact date` : A date associated with an impact. - `impacted variable` : A variable that was impacted by the news. - `weight` : The weight of news from a given `update date` and `update variable` on a given `impacted variable` at a given `impact date`. - `impact` : The revision to the smoothed estimate / forecast of the impacted variable at the impact date based specifically on the news generated by the `updated variable` at the `update date`. See Also -------- details_by_impact details_by_update """ # Squeeze for univariate models if self.updated.model.k_endog == 1: if impacted_variable is None: impacted_variable = self.updated.model.endog_names if updated_variable is None: updated_variable = self.updated.model.endog_names # Select only the variables / dates of interest s = list(np.s_[:, :, :, :, :, :]) if impact_date is not None: s[0] = np.s_[impact_date] if impacted_variable is not None: s[1] = np.s_[impacted_variable] if update_date is not None: s[2] = np.s_[update_date] if updated_variable is not None: s[3] = np.s_[updated_variable] s = tuple(s) details = self.details_by_impact.loc[s, :] # Make the first index level the groupby level groupby = groupby.lower().replace('_', ' ') groupby_overall = 'impact' levels_order = [0, 1, 2, 3] if groupby == 'update date': levels_order = [2, 3, 0, 1] groupby_overall = 'update' elif groupby == 'updated variable': levels_order = [3, 2, 1, 0] groupby_overall = 'update' elif groupby == 'impacted variable': levels_order = [1, 0, 3, 2] elif groupby != 'impact date': raise ValueError('Invalid groupby for details table. Valid options' ' are "update date", "updated variable",' ' "impact date",or "impacted variable".' f' Got "{groupby}".') details.index = (details.index.reorder_levels(levels_order) .remove_unused_levels()) details = details.sort_index() # If our overall group-by is `update`, move forecast (prev) and # observed into the index base_levels = [0, 1, 2, 3] if groupby_overall == 'update': details.set_index(['observed', 'forecast (prev)'], append=True, inplace=True) details.index = details.index.reorder_levels([0, 1, 4, 5, 2, 3]) base_levels = [0, 1, 4, 5] # Drop the non-groupby levels if there's only one value tmp_index = details.index.remove_unused_levels() n_levels = len(tmp_index.levels) k_level_values = [len(tmp_index.levels[i]) for i in range(n_levels)] removed_levels = [] if sparsify: for i in sorted(base_levels)[::-1][:-1]: if k_level_values[i] == 1: name = tmp_index.names[i] value = tmp_index.levels[i][0] can_drop = ( (name == 'update date' and update_date is not None) or (name == 'updated variable' and updated_variable is not None) or (name == 'impact date' and impact_date is not None) or (name == 'impacted variable' and (impacted_variable is not None or self.impacted_variable is not None))) if can_drop or not multiple_tables: removed_levels.insert(0, f'{name} = {value}') details.index = tmp_index = tmp_index.droplevel(i) # Move everything to columns details = details.reset_index() # Function for formatting numbers def str_format(num, mark_ones=False, mark_zeroes=False): if pd.isnull(num): out = '' elif mark_ones and np.abs(1 - num) < self.tolerance: out = '1.0' elif mark_zeroes and np.abs(num) < self.tolerance: out = '0' else: out = float_format % num return out # Function to create the table def create_table(details, removed_levels): # Convert everything to strings for key in ['observed', 'forecast (prev)', 'news', 'weight', 'impact']: if key in details: args = ( # mark_ones True if key in ['weight'] else False, # mark_zeroes True if key in ['weight', 'impact'] else False) details[key] = details[key].apply(str_format, args=args) for key in ['update date', 'impact date']: if key in details: details[key] = details[key].apply(str) # Sparsify index columns if sparsify: sparsify_cols = ['update date', 'updated variable', 'impact date', 'impacted variable'] if groupby_overall == 'update': sparsify_cols += ['observed', 'forecast (prev)'] for key in sparsify_cols: if key in details: mask = details[key] == details[key].shift(1) details.loc[mask, key] = '' params_data = details.values params_header = details.columns.tolist() params_stubs = None title = 'Details' if len(removed_levels): title += ' for [' + ', '.join(removed_levels) + ']' return SimpleTable(params_data, params_header, params_stubs, txt_fmt=fmt_params, title=title) if multiple_tables: details_table = [] for item in details[groupby].unique(): mask = details[groupby] == item item_details = details[mask].drop(groupby, axis=1) item_removed_levels = [f'{groupby} = {item}'] + removed_levels details_table.append(create_table(item_details, item_removed_levels)) else: details_table = create_table(details, removed_levels) return details_table
[docs] def summary_revisions(self, sparsify=True): """ Create summary table showing revisions to the previous results' data Parameters ---------- sparsify : bool, optional, default True Set to False for the table to include every one of the multiindex keys at each row. Returns ------- revisions_table : SimpleTable Table showing revisions to the previous results' data. Columns are: - `revision date` : date associated with a revised data point - `revised variable` : variable that was revised at `revision date` - `observed (prev)` : the observed value prior to the revision - `revised` : the new value after the revision """ data = self.data_revisions.sort_index().reset_index() data[['revision date', 'revised variable']] = ( data[['revision date', 'revised variable']].applymap(str)) data.iloc[:, 2:] = data.iloc[:, 2:].applymap( lambda num: '' if pd.isnull(num) else '%.2f' % num) # Sparsify the date column if sparsify: mask = data['revision date'] == data['revision date'].shift(1) data.loc[mask, 'revision date'] = '' params_data = data.values params_header = data.columns.tolist() params_stubs = None title = 'Revisions to dataset:' revisions_table = SimpleTable( params_data, params_header, params_stubs, txt_fmt=fmt_params, title=title) return revisions_table
[docs] def summary_news(self, sparsify=True): """ Create summary table showing news from new data since previous results Parameters ---------- sparsify : bool, optional, default True Set to False for the table to include every one of the multiindex keys at each row. Returns ------- updates_table : SimpleTable Table showing new datapoints that were not in the previous results' data. Columns are: - `update date` : date associated with a new data point. - `updated variable` : variable for which new data was added at `update date`. - `forecast (prev)` : the forecast value for the updated variable at the update date in the previous results object (i.e. prior to the data being available). - `observed` : the observed value of the new datapoint. See Also -------- data_updates """ data = pd.merge( self.data_updates, self.news, left_index=True, right_index=True).sort_index().reset_index() data[['update date', 'updated variable']] = ( data[['update date', 'updated variable']].applymap(str)) data.iloc[:, 2:] = data.iloc[:, 2:].applymap( lambda num: '' if pd.isnull(num) else '%.2f' % num) # Sparsify the date column if sparsify: mask = data['update date'] == data['update date'].shift(1) data.loc[mask, 'update date'] = '' params_data = data.values params_header = data.columns.tolist() params_stubs = None title = 'News from updated observations:' updates_table = SimpleTable( params_data, params_header, params_stubs, txt_fmt=fmt_params, title=title) return updates_table
[docs] def summary(self, impact_date=None, impacted_variable=None, update_date=None, updated_variable=None, impacts_groupby='impact date', details_groupby='update date', show_revisions_columns=None, sparsify=True, include_details_tables=None, include_revisions_tables=False, float_format='%.2f'): """ Create summary tables describing news and impacts Parameters ---------- impact_date : int, str, datetime, list, array, or slice, optional Observation index label or slice of labels specifying particular impact periods to display. The impact date(s) describe the periods in which impacted variables were *affected* by the news. If this argument is given, the impact and details tables will only show this impact date or dates. Note that this argument is passed to the Pandas `loc` accessor, and so it should correspond to the labels of the model's index. If the model was created with data in a list or numpy array, then these labels will be zero-indexes observation integers. impacted_variable : str, list, array, or slice, optional Observation variable label or slice of labels specifying particular impacted variables to display. The impacted variable(s) describe the variables that were *affected* by the news. If you do not know the labels for the variables, check the `endog_names` attribute of the model instance. update_date : int, str, datetime, list, array, or slice, optional Observation index label or slice of labels specifying particular updated periods to display. The updated date(s) describe the periods in which the new data points were available that generated the news). See the note on `impact_date` for details about what these labels are. updated_variable : str, list, array, or slice, optional Observation variable label or slice of labels specifying particular updated variables to display. The updated variable(s) describe the variables that were *affected* by the news. If you do not know the labels for the variables, check the `endog_names` attribute of the model instance. impacts_groupby : {impact date, impacted date} The primary variable for grouping results in the impacts table. The default is to group by update date. details_groupby : str One of "update date", "updated date", "impact date", or "impacted date". The primary variable for grouping results in the details table. Only used if the details tables are included. The default is to group by update date. show_revisions_columns : bool, optional If set to False, the impacts table will not show the impacts from data revisions or the total impacts. Default is to show the revisions and totals columns if any revisions were made and otherwise to hide them. sparsify : bool, optional, default True Set to False for the table to include every one of the multiindex keys at each row. include_details_tables : bool, optional If set to True, the summary will show tables describing the details of how news from specific updates translate into specific impacts. These tables can be very long, particularly in cases where there were many updates and in multivariate models. The default is to show detailed tables only for univariate models. include_revisions_tables : bool, optional If set to True, the summary will show tables describing the revisions and updates that lead to impacts on variables of interest. float_format : str, optional Formatter format string syntax for converting numbers to strings. Default is '%.2f'. Returns ------- summary_tables : Summary Summary tables describing news and impacts. Basic tables include: - A table with general information about the sample. - A table describing the impacts of revisions and news. - Tables describing revisions in the dataset since the previous results set (unless `include_revisions_tables=False`). In univariate models or if `include_details_tables=True`, one or more tables will additionally be included describing the details of how news from specific updates translate into specific impacts. See Also -------- summary_impacts summary_details summary_revisions summary_updates """ # Default for include_details_tables if include_details_tables is None: include_details_tables = self.updated.model.k_endog == 1 # Model specification results model = self.model.model title = 'News' def get_sample(model): if model._index_dates: ix = model._index d = ix[0] sample = ['%s' % d] d = ix[-1] sample += ['- ' + '%s' % d] else: sample = [str(0), ' - ' + str(model.nobs)] return sample previous_sample = get_sample(self.previous.model) revised_sample = get_sample(self.updated.model) # Standardize the model name as a list of str model_name = model.__class__.__name__ # Top summary table top_left = [('Model:', [model_name]), ('Date:', None), ('Time:', None)] top_right = [ ('Original sample:', [previous_sample[0]]), ('', [previous_sample[1]]), ('Update through:', [revised_sample[1][2:]]), ('No. Revisions:', [len(self.revisions_ix)]), ('No. New datapoints:', [len(self.updates_ix)])] summary = Summary() self.model.endog_names = self.model.model.endog_names summary.add_table_2cols(self, gleft=top_left, gright=top_right, title=title) table_ix = 1 # Impact table summary.tables.insert(table_ix, self.summary_impacts( impact_date=impact_date, impacted_variable=impacted_variable, groupby=impacts_groupby, show_revisions_columns=show_revisions_columns, sparsify=sparsify, float_format=float_format)) table_ix += 1 # News table if len(self.updates_iloc) > 0: summary.tables.insert( table_ix, self.summary_news(sparsify=sparsify)) table_ix += 1 # Detail tables multiple_tables = self.updated.model.k_endog > 1 details_tables = self.summary_details( impact_date=impact_date, impacted_variable=impacted_variable, groupby=details_groupby, sparsify=sparsify, float_format=float_format, multiple_tables=multiple_tables) if not multiple_tables: details_tables = [details_tables] if include_details_tables: for table in details_tables: summary.tables.insert(table_ix, table) table_ix += 1 # Revisions if include_revisions_tables and len(self.revisions_iloc) > 0: summary.tables.insert( table_ix, self.summary_revisions(sparsify=sparsify)) table_ix += 1 return summary