Source code for statsmodels.tsa.statespace.news

# -*- coding: utf-8 -*-
"""
News for state space models

Author: Chad Fulton
License: BSD-3
"""

import numpy as np
import pandas as pd

from statsmodels.iolib.table import SimpleTable
from statsmodels.iolib.summary import Summary
from statsmodels.iolib.tableformatting import fmt_params


class NewsResults(object):
    """
    Impacts of data revisions and news on estimates of variables of interest

    Parameters
    ----------
    news_results : SimpleNamespace instance
        Results from `KalmanSmoother.news`.
    model : MLEResults
        The results object associated with the model from which the NewsResults
        was generated.
    updated : MLEResults
        The results object associated with the model containing the updated
        dataset.
    previous : MLEResults
        The results object associated with the model containing the previous
        dataset.
    impacted_variable : str, list, array, or slice, optional
        Observation variable label or slice of labels specifying particular
        impacted variables to display in output. The impacted variable(s)
        describe the variables that were *affected* by the news. If you do not
        know the labels for the variables, check the `endog_names` attribute of
        the model instance.
    tolerance : float, optional
        The numerical threshold for determining zero impact. Default is that
        any impact less than 1e-10 is assumed to be zero.
    row_labels : iterable
        Row labels (often dates) for the impacts of the revisions and news.

    Attributes
    ----------
    total_impacts : pd.Series
        Updates to forecasts of impacted variables from both news and data
        revisions, E[y^i | post] - E[y^i | previous].
    update_impacts : pd.Series
        Updates to forecasts of impacted variables from the news,
        E[y^i | post] - E[y^i | revisions] where y^i are the impacted variables
        of interest.
    revision_impacts : pd.Series
        Updates to forecasts of impacted variables from data revisions,
        E[y^i | revisions] - E[y^i | previous].
    news : pd.Series
        The unexpected component of the updated data,
        E[y^u | post] - E[y^u | revisions] where y^u are the updated variables.
    weights : pd.Series
        Weights describing the effect of news on variables of interest.
    update_forecasts : pd.Series
        Forecasts based on the previous dataset of the variables that were
        updated, E[y^u | previous].
    update_realized : pd.Series
        Actual observed data associated with the variables that were
        updated, y^u
    prev_impacted_forecasts : pd.Series
        Previous forecast of the variables of interest, E[y^i | previous].
    post_impacted_forecasts : pd.Series
        Forecast of the variables of interest after taking into account both
        revisions and updates, E[y^i | post].
    revisions_iloc : pd.DataFrame
        The integer locations of the data revisions in the dataset.
    revisions_ix : pd.DataFrame
        The label-based locations of the data revisions in the dataset.
    updates_iloc : pd.DataFrame
        The integer locations of the updated data points.
    updates_ix : pd.DataFrame
        The label-based locations of updated data points.

    References
    ----------
    .. [1] Bańbura, Marta, and Michele Modugno.
           "Maximum likelihood estimation of factor models on datasets with
           arbitrary pattern of missing data."
           Journal of Applied Econometrics 29, no. 1 (2014): 133-160.
    .. [2] Bańbura, Marta, Domenico Giannone, and Lucrezia Reichlin.
           "Nowcasting."
           The Oxford Handbook of Economic Forecasting. July 8, 2011.
    .. [3] Bańbura, Marta, Domenico Giannone, Michele Modugno, and Lucrezia
           Reichlin.
           "Now-casting and the real-time data flow."
           In Handbook of economic forecasting, vol. 2, pp. 195-237.
           Elsevier, 2013.
    """
    def __init__(self, news_results, model, updated, previous,
                 impacted_variable=None, tolerance=1e-10, row_labels=None):
        # Note: `model` will be the same as one of `revised` or `previous`, but
        # we need to save it as self.model so that the `predict_dates`, which
        # were generated by the `_get_prediction_index` call, will be available
        # for use by the base wrapping code.
        self.model = model
        self.updated = updated
        self.previous = previous
        self.news_results = news_results
        self._impacted_variable = impacted_variable
        self._tolerance = tolerance
        self.row_labels = row_labels
        self.params = []  # required for `summary` to work

        columns = np.atleast_1d(self.updated.model.endog_names)

        # E[y^i | post]
        self.post_impacted_forecasts = pd.DataFrame(
            news_results.post_impacted_forecasts.T,
            index=self.row_labels, columns=columns)
        # E[y^i | previous]
        self.prev_impacted_forecasts = pd.DataFrame(
            news_results.prev_impacted_forecasts.T,
            index=self.row_labels, columns=columns)
        # E[y^i | post] - E[y^i | revisions]
        self.update_impacts = pd.DataFrame(
            news_results.update_impacts,
            index=self.row_labels, columns=columns)
        # E[y^i | revisions] - E[y^i | previous]
        self.revision_impacts = pd.DataFrame(
            news_results.revision_impacts,
            index=self.row_labels, columns=columns)
        # E[y^i | post] - E[y^i | previous]
        self.total_impacts = (self.post_impacted_forecasts -
                              self.prev_impacted_forecasts)

        # Indices of revisions and updates
        index = self.updated.model._index
        self.revisions_iloc = pd.DataFrame(
            list(zip(*news_results.revisions_ix)),
            index=['revision date', 'revised variable']).T
        iloc = self.revisions_iloc
        if len(iloc) > 0:
            self.revisions_ix = pd.DataFrame({
                'revision date': index[iloc['revision date']],
                'revised variable': columns[iloc['revised variable']]})
        else:
            self.revisions_ix = iloc.copy()

        self.updates_iloc = pd.DataFrame(
            list(zip(*news_results.updates_ix)),
            index=['update date', 'updated variable']).T
        iloc = self.updates_iloc
        if len(iloc) > 0:
            self.updates_ix = pd.DataFrame({
                'update date': index[iloc['update date']],
                'updated variable': columns[iloc['updated variable']]})
        else:
            self.updates_ix = iloc.copy()

        # Wrap forecasts and forecasts errors
        ix = pd.MultiIndex.from_arrays([self.updates_ix['update date'],
                                        self.updates_ix['updated variable']])

        # E[y^u | post] - E[y^u | previous]
        if news_results.news is None:
            self.news = pd.Series([], index=ix, name='news',
                                  dtype=model.params.dtype)
        else:
            self.news = pd.Series(news_results.news, index=ix, name='news')
        # E[y^u | previous]
        if news_results.update_forecasts is None:
            self.update_forecasts = pd.Series([], index=ix,
                                              dtype=model.params.dtype)
        else:
            self.update_forecasts = pd.Series(
                news_results.update_forecasts, index=ix)
        # y^u
        if news_results.update_realized is None:
            self.update_realized = pd.Series([], index=ix,
                                             dtype=model.params.dtype)
        else:
            self.update_realized = pd.Series(
                news_results.update_realized, index=ix)
        cols = pd.MultiIndex.from_product([self.row_labels, columns])
        # reshaped version of gain matrix E[y A'] E[A A']^{-1}
        if len(self.updates_iloc):
            weights = news_results.gain.transpose(0, 1, 2).reshape(
                len(cols), len(ix))
        else:
            weights = np.zeros((len(cols), len(ix)))
        self.weights = pd.DataFrame(weights, index=cols, columns=ix).T
        self.weights.columns.names = ['impact date', 'impacted variable']

    @property
    def impacted_variable(self):
        return self._impacted_variable

    @impacted_variable.setter
    def impacted_variable(self, value):
        self._impacted_variable = value

    @property
    def tolerance(self):
        return self._tolerance

    @tolerance.setter
    def tolerance(self, value):
        self._tolerance = value

    @property
    def data_revisions(self):
        """
        Revisions to data points that existed in the previous dataset

        Returns
        -------
        data_revisions : pd.DataFrame
            Index is as MultiIndex consisting of `revision date` and
            `revised variable`. The columns are:

            - `observed (prev)`: the value of the data as it was observed
              in the previous dataset.
            - `revised`: the revised value of the data, as it is observed
              in the new dataset

        See also
        --------
        data_updates
        """
        # Save revisions data
        data = self.revisions_ix.copy()
        data['observed (prev)'] = [
            self.previous.model.endog[row[0], row[1]]
            for _, row in self.revisions_iloc.iterrows()]
        data['revised'] = [
            self.updated.model.endog[row[0], row[1]]
            for _, row in self.revisions_iloc.iterrows()]
        data.index = pd.MultiIndex.from_arrays([data['revision date'],
                                                data['revised variable']])
        data = data.sort_index().drop(['revision date',
                                       'revised variable'], axis=1)
        return data

    @property
    def data_updates(self):
        """
        Updated data; new entries that did not exist in the previous dataset

        Returns
        -------
        data_updates : pd.DataFrame
            Index is as MultiIndex consisting of `update date` and
            `updated variable`. The columns are:

            - `forecast (prev)`: the previous forecast of the new entry,
              based on the information available in the previous dataset
              (recall that for these updated data points, the previous dataset
              had no observed value for them at all)
            - `observed`: the value of the new entry, as it is observed in the
              new dataset

        See also
        --------
        data_updates
        """
        data = pd.concat([self.update_realized, self.update_forecasts],
                         axis=1).sort_index().reset_index()
        data.columns = (data.columns[:2].tolist() +
                        ['observed', 'forecast (prev)'])
        data.index = pd.MultiIndex.from_arrays([data['update date'],
                                                data['updated variable']])
        data = data.sort_index().drop(['update date',
                                       'updated variable'], axis=1)
        return data

    @property
    def details_by_impact(self):
        """
        Details of forecast revisions from news, organized by impacts first

        Returns
        -------
        details : pd.DataFrame
            Index is as MultiIndex consisting of:

            - `impact date`: the date of the impact on the variable of interest
            - `impacted variable`: the variable that is being impacted
            - `update date`: the date of the data update, that results in
              `news` that impacts the forecast of variables of interest
            - `updated variable`: the variable being updated, that results in
              `news` that impacts the forecast of variables of interest

            The columns are:

            - `forecast (prev)`: the previous forecast of the new entry,
              based on the information available in the previous dataset
            - `observed`: the value of the new entry, as it is observed in the
              new dataset
            - `news`: the news associated with the update (this is just the
              forecast error: `observed` - `forecast (prev)`)
            - `weight`: the weight describing how the `news` effects the
              forecast of the variable of interest
            - `impact`: the impact of the `news` on the forecast of the
              variable of interest

        Notes
        -----
        This table decomposes updated forecasts of variables of interest from
        the `news` associated with each updated datapoint from the new data
        release.

        This table does not summarize the impacts or show the effect of
        revisions. That information can be found in the `impacts` table.

        This form of the details table is organized so that the impacted
        dates / variables are first in the index. This is convenient for
        slicing by impacted variables / dates to view the details of data
        updates for a particular variable or date.

        However, since the `forecast (prev)` and `observed` columns have a lot
        of duplication, printing the entire table gives a result that is less
        easy to parse than that produced by the `details_by_update` property.
        `details_by_update` contains the same information but is organized to
        be more convenient for displaying the entire table of detailed updates.
        At the same time, `details_by_update` is less convenient for
        subsetting.

        See Also
        --------
        details_by_update
        impacts
        """
        df = self.weights.stack(level=[0, 1]).rename('weight').to_frame()
        if len(self.updates_iloc):
            df['forecast (prev)'] = self.update_forecasts
            df['observed'] = self.update_realized
            df['news'] = self.news
            df['impact'] = df['news'] * df['weight']
        else:
            df['forecast (prev)'] = []
            df['observed'] = []
            df['news'] = []
            df['impact'] = []
        df = df[['observed', 'forecast (prev)', 'news', 'weight', 'impact']]
        df = df.reorder_levels([2, 3, 0, 1]).sort_index()

        if self.impacted_variable is not None and len(df) > 0:
            df = df.loc[np.s_[:, self.impacted_variable], :]

        mask = np.abs(df['weight']) > self.tolerance
        return df[mask]

    @property
    def details_by_update(self):
        """
        Details of forecast revisions from news, organized by updates first

        Returns
        -------
        details : pd.DataFrame
            Index is as MultiIndex consisting of:

            - `update date`: the date of the data update, that results in
              `news` that impacts the forecast of variables of interest
            - `updated variable`: the variable being updated, that results in
              `news` that impacts the forecast of variables of interest
            - `forecast (prev)`: the previous forecast of the new entry,
              based on the information available in the previous dataset
            - `observed`: the value of the new entry, as it is observed in the
              new dataset
            - `impact date`: the date of the impact on the variable of interest
            - `impacted variable`: the variable that is being impacted

            The columns are:

            - `news`: the news associated with the update (this is just the
              forecast error: `observed` - `forecast (prev)`)
            - `weight`: the weight describing how the `news` effects the
              forecast of the variable of interest
            - `impact`: the impact of the `news` on the forecast of the
              variable of interest

        Notes
        -----
        This table decomposes updated forecasts of variables of interest from
        the `news` associated with each updated datapoint from the new data
        release.

        This table does not summarize the impacts or show the effect of
        revisions. That information can be found in the `impacts` table.

        This form of the details table is organized so that the updated
        dates / variables are first in the index, and in this table the index
        also contains the forecasts and observed values of the updates. This is
        convenient for displaying the entire table of detailed updates because
        it allows sparsifying duplicate entries.

        However, since it includes forecasts and observed values in the index
        of the table, it is not convenient for subsetting by the variable of
        interest. Instead, the `details_by_impact` property is organized to
        make slicing by impacted variables / dates easy. This allows, for
        example, viewing the details of data updates on a particular variable
        or date of interest.

        See Also
        --------
        details_by_impact
        impacts
        """
        df = self.weights.stack(level=[0, 1]).rename('weight').to_frame()
        if len(self.updates_iloc):
            df['forecast (prev)'] = self.update_forecasts
            df['observed'] = self.update_realized
            df['news'] = self.news
            df['impact'] = df['news'] * df['weight']
        else:
            df['forecast (prev)'] = []
            df['observed'] = []
            df['news'] = []
            df['impact'] = []
        df = df[['forecast (prev)', 'observed', 'news',
                 'weight', 'impact']]
        df = df.reset_index()
        keys = ['update date', 'updated variable', 'observed',
                'forecast (prev)', 'impact date', 'impacted variable']
        df.index = pd.MultiIndex.from_arrays([df[key] for key in keys])
        details = df.drop(keys, axis=1).sort_index()

        if self.impacted_variable is not None and len(df) > 0:
            details = details.loc[
                np.s_[:, :, :, :, :, self.impacted_variable], :]

        mask = np.abs(details['weight']) > self.tolerance
        return details[mask]

    @property
    def impacts(self):
        """
        Impacts from news and revisions on all dates / variables of interest

        Returns
        -------
        impacts : pd.DataFrame
            Index is as MultiIndex consisting of:

            - `impact date`: the date of the impact on the variable of interest
            - `impacted variable`: the variable that is being impacted

            The columns are:

            - `estimate (prev)`: the previous estimate / forecast of the
              date / variable of interest.
            - `impact of revisions`: the impact of all data revisions on
              the estimate of the date / variable of interest.
            - `impact of news`: the impact of all news on the estimate of
              the date / variable of interest.
            - `total impact`: the total impact of both revisions and news on
              the estimate of the date / variable of interest.
            - `estimate (new)`: the new estimate / forecast of the
              date / variable of interest after taking into account the effects
              of the revisions and news.

        Notes
        -----
        This table decomposes updated forecasts of variables of interest into
        the overall effect from revisions and news.

        This table does not break down the detail by the updated
        dates / variables. That information can be found in the
        `details_by_impact` `details_by_update` tables.

        See Also
        --------
        details_by_impact
        details_by_update
        """
        # Summary of impacts
        impacts = pd.concat([
            self.prev_impacted_forecasts.unstack().rename('estimate (prev)'),
            self.revision_impacts.unstack().rename('impact of revisions'),
            self.update_impacts.unstack().rename('impact of news'),
            self.post_impacted_forecasts.unstack().rename('estimate (new)')],
            axis=1)
        impacts['impact of revisions'] = (
            impacts['impact of revisions'].fillna(0))
        impacts['impact of news'] = (
            impacts['impact of news'].fillna(0))
        impacts['total impact'] = (impacts['impact of revisions'] +
                                   impacts['impact of news'])
        impacts = impacts.reorder_levels([1, 0]).sort_index()
        impacts.index.names = ['impact date', 'impacted variable']
        impacts = impacts[['estimate (prev)', 'impact of revisions',
                           'impact of news', 'total impact', 'estimate (new)']]

        if self.impacted_variable is not None:
            impacts = impacts.loc[np.s_[:, self.impacted_variable], :]

        tmp = np.abs(impacts[['impact of revisions', 'impact of news']])
        mask = (tmp > self.tolerance).any(axis=1)

        return impacts[mask]

    def summary_impacts(self, impact_date=None, impacted_variable=None,
                        groupby='impact date', show_revisions_columns=None,
                        sparsify=True, float_format='%.2f'):
        """
        Create summary table with detailed impacts from news; by date, variable

        Parameters
        ----------
        impact_date : int, str, datetime, list, array, or slice, optional
            Observation index label or slice of labels specifying particular
            impact periods to display. The impact date(s) describe the periods
            in which impacted variables were *affected* by the news. If this
            argument is given, the output table will only show this impact date
            or dates. Note that this argument is passed to the Pandas `loc`
            accessor, and so it should correspond to the labels of the model's
            index. If the model was created with data in a list or numpy array,
            then these labels will be zero-indexes observation integers.
        impacted_variable : str, list, array, or slice, optional
            Observation variable label or slice of labels specifying particular
            impacted variables to display. The impacted variable(s) describe
            the variables that were *affected* by the news. If you do not know
            the labels for the variables, check the `endog_names` attribute of
            the model instance.
        groupby : {impact date, impacted date}
            The primary variable for grouping results in the impacts table. The
            default is to group by update date.
        show_revisions_columns : bool, optional
            If set to False, the impacts table will not show the impacts from
            data revisions or the total impacts. Default is to show the
            revisions and totals columns if any revisions were made and
            otherwise to hide them.
        sparsify : bool, optional, default True
            Set to False for the table to include every one of the multiindex
            keys at each row.
        float_format : str, optional
            Formatter format string syntax for converting numbers to strings.
            Default is '%.2f'.

        Returns
        -------
        impacts_table : SimpleTable
            Table describing total impacts from both revisions and news. See
            the documentation for the `impacts` attribute for more details
            about the index and columns.

        See Also
        --------
        impacts
        """
        # Squeeze for univariate models
        if impacted_variable is None and self.updated.model.k_endog == 1:
            impacted_variable = self.updated.model.endog_names

        # Default is to only show the revisions columns if there were any
        # revisions (otherwise it would just be a column of zeros)
        if show_revisions_columns is None:
            show_revisions_columns = len(self.revisions_iloc) > 0

        # Select only the variables / dates of interest
        s = list(np.s_[:, :])
        if impact_date is not None:
            s[0] = np.s_[impact_date]
        if impacted_variable is not None:
            s[1] = np.s_[impacted_variable]
        s = tuple(s)
        impacts = self.impacts.loc[s, :]

        # Make the first index level the groupby level
        groupby = groupby.lower()
        if groupby in ['impacted variable', 'impacted_variable']:
            impacts.index = impacts.index.swaplevel(1, 0)
        elif groupby not in ['impact date', 'impact_date']:
            raise ValueError('Invalid groupby for impacts table. Valid options'
                             ' are "impact date" or "impacted variable".'
                             f'Got "{groupby}".')
        impacts = impacts.sort_index()

        # Drop the non-groupby level if there's only one value
        tmp_index = impacts.index.remove_unused_levels()
        k_vars = len(tmp_index.levels[1])
        removed_level = None
        if sparsify and k_vars == 1:
            name = tmp_index.names[1]
            value = tmp_index.levels[1][0]
            removed_level = f'{name} = {value}'
            impacts.index = tmp_index.droplevel(1)
            impacts = impacts.applymap(
                lambda num: '' if pd.isnull(num) else float_format % num)
            impacts = impacts.reset_index()
            impacts.iloc[:, 0] = impacts.iloc[:, 0].map(str)
        else:
            impacts = impacts.reset_index()
            impacts.iloc[:, :2] = impacts.iloc[:, :2].applymap(str)
            impacts.iloc[:, 2:] = impacts.iloc[:, 2:].applymap(
                lambda num: '' if pd.isnull(num) else float_format % num)

        # Sparsify the groupby column
        if sparsify and groupby in impacts:
            mask = impacts[groupby] == impacts[groupby].shift(1)
            tmp = impacts.loc[mask, groupby]
            if len(tmp) > 0:
                impacts.loc[mask, groupby] = ''

        # Drop revisions and totals columns if applicable
        if not show_revisions_columns:
            impacts.drop(['impact of revisions', 'total impact'], axis=1,
                         inplace=True)

        params_data = impacts.values
        params_header = impacts.columns.tolist()
        params_stubs = None

        title = 'Impacts'
        if removed_level is not None:
            join = 'on' if groupby == 'date' else 'for'
            title += f' {join} [{removed_level}]'
        impacts_table = SimpleTable(
            params_data, params_header, params_stubs,
            txt_fmt=fmt_params, title=title)

        return impacts_table

    def summary_details(self, impact_date=None, impacted_variable=None,
                        update_date=None, updated_variable=None,
                        groupby='update date', sparsify=True,
                        float_format='%.2f', multiple_tables=False):
        """
        Create summary table with detailed impacts from news; by date, variable

        Parameters
        ----------
        impact_date : int, str, datetime, list, array, or slice, optional
            Observation index label or slice of labels specifying particular
            impact periods to display. The impact date(s) describe the periods
            in which impacted variables were *affected* by the news. If this
            argument is given, the output table will only show this impact date
            or dates. Note that this argument is passed to the Pandas `loc`
            accessor, and so it should correspond to the labels of the model's
            index. If the model was created with data in a list or numpy array,
            then these labels will be zero-indexes observation integers.
        impacted_variable : str, list, array, or slice, optional
            Observation variable label or slice of labels specifying particular
            impacted variables to display. The impacted variable(s) describe
            the variables that were *affected* by the news. If you do not know
            the labels for the variables, check the `endog_names` attribute of
            the model instance.
        update_date : int, str, datetime, list, array, or slice, optional
            Observation index label or slice of labels specifying particular
            updated periods to display. The updated date(s) describe the
            periods in which the new data points were available that generated
            the news). See the note on `impact_date` for details about what
            these labels are.
        updated_variable : str, list, array, or slice, optional
            Observation variable label or slice of labels specifying particular
            updated variables to display. The updated variable(s) describe the
            variables that were *affected* by the news. If you do not know the
            labels for the variables, check the `endog_names` attribute of the
            model instance.
        groupby : {update date, updated date, impact date, impacted date}
            The primary variable for grouping results in the details table. The
            default is to group by update date.
        sparsify : bool, optional, default True
            Set to False for the table to include every one of the multiindex
            keys at each row.
        float_format : str, optional
            Formatter format string syntax for converting numbers to strings.
            Default is '%.2f'.
        multiple_tables : bool, optional
            If set to True, this function will return a list of tables, one
            table for each of the unique `groupby` levels. Default is False,
            in which case this function returns a single table.

        Returns
        -------
        details_table : SimpleTable or list of SimpleTable
            Table or list of tables describing how the news from each update
            (i.e. news from a particular variable / date) translates into
            changes to the forecasts of each impacted variable variable / date.

            This table contains information about the updates and about the
            impacts. Updates are newly observed datapoints that were not
            available in the previous results set. Each update leads to news,
            and the news may cause changes in the forecasts of the impacted
            variables. The amount that a particular piece of news (from an
            update to some variable at some date) impacts a variable at some
            date depends on weights that can be computed from the model
            results.

            The data contained in this table that refer to updates are:

            - `update date` : The date at which a new datapoint was added.
            - `updated variable` : The variable for which a new datapoint was
              added.
            - `forecast (prev)` : The value that had been forecast by the
              previous model for the given updated variable and date.
            - `observed` : The observed value of the new datapoint.
            - `news` : The news is the difference between the observed value
              and the previously forecast value for a given updated variable
              and date.

            The data contained in this table that refer to impacts are:

            - `impact date` : A date associated with an impact.
            - `impacted variable` : A variable that was impacted by the news.
            - `weight` : The weight of news from a given `update date` and
              `update variable` on a given `impacted variable` at a given
              `impact date`.
            - `impact` : The revision to the smoothed estimate / forecast of
              the impacted variable at the impact date based specifically on
              the news generated by the `updated variable` at the
              `update date`.

        See Also
        --------
        details_by_impact
        details_by_update
        """
        # Squeeze for univariate models
        if self.updated.model.k_endog == 1:
            if impacted_variable is None:
                impacted_variable = self.updated.model.endog_names
            if updated_variable is None:
                updated_variable = self.updated.model.endog_names

        # Select only the variables / dates of interest
        s = list(np.s_[:, :, :, :, :, :])
        if impact_date is not None:
            s[0] = np.s_[impact_date]
        if impacted_variable is not None:
            s[1] = np.s_[impacted_variable]
        if update_date is not None:
            s[2] = np.s_[update_date]
        if updated_variable is not None:
            s[3] = np.s_[updated_variable]
        s = tuple(s)
        details = self.details_by_impact.loc[s, :]

        # Make the first index level the groupby level
        groupby = groupby.lower().replace('_', ' ')
        groupby_overall = 'impact'
        levels_order = [0, 1, 2, 3]
        if groupby == 'update date':
            levels_order = [2, 3, 0, 1]
            groupby_overall = 'update'
        elif groupby == 'updated variable':
            levels_order = [3, 2, 1, 0]
            groupby_overall = 'update'
        elif groupby == 'impacted variable':
            levels_order = [1, 0, 3, 2]
        elif groupby != 'impact date':
            raise ValueError('Invalid groupby for details table. Valid options'
                             ' are "update date", "updated variable",'
                             ' "impact date",or "impacted variable".'
                             f' Got "{groupby}".')
        details.index = (details.index.reorder_levels(levels_order)
                                      .remove_unused_levels())
        details = details.sort_index()

        # If our overall group-by is `update`, move forecast (prev) and
        # observed into the index
        base_levels = [0, 1, 2, 3]
        if groupby_overall == 'update':
            details.set_index(['observed', 'forecast (prev)'], append=True,
                              inplace=True)
            details.index = details.index.reorder_levels([0, 1, 4, 5, 2, 3])
            base_levels = [0, 1, 4, 5]

        # Drop the non-groupby levels if there's only one value
        tmp_index = details.index.remove_unused_levels()
        n_levels = len(tmp_index.levels)
        k_level_values = [len(tmp_index.levels[i]) for i in range(n_levels)]
        removed_levels = []
        if sparsify:
            for i in sorted(base_levels)[::-1][:-1]:
                if k_level_values[i] == 1:
                    name = tmp_index.names[i]
                    value = tmp_index.levels[i][0]
                    can_drop = (
                        (name == 'update date' and update_date is not None) or
                        (name == 'updated variable' and
                            updated_variable is not None) or
                        (name == 'impact date' and impact_date is not None) or
                        (name == 'impacted variable' and
                            (impacted_variable is not None or
                             self.impacted_variable is not None)))
                    if can_drop or not multiple_tables:
                        removed_levels.insert(0, f'{name} = {value}')
                        details.index = tmp_index = tmp_index.droplevel(i)

        # Move everything to columns
        details = details.reset_index()

        # Function for formatting numbers
        def str_format(num, mark_ones=False, mark_zeroes=False):
            if pd.isnull(num):
                out = ''
            elif mark_ones and np.abs(1 - num) < self.tolerance:
                out = '1.0'
            elif mark_zeroes and np.abs(num) < self.tolerance:
                out = '0'
            else:
                out = float_format % num
            return out

        # Function to create the table
        def create_table(details, removed_levels):
            # Convert everything to strings
            for key in ['observed', 'forecast (prev)', 'news', 'weight',
                        'impact']:
                if key in details:
                    args = (
                        # mark_ones
                        True if key in ['weight'] else False,
                        # mark_zeroes
                        True if key in ['weight', 'impact'] else False)
                    details[key] = details[key].apply(str_format, args=args)
            for key in ['update date', 'impact date']:
                if key in details:
                    details[key] = details[key].apply(str)

            # Sparsify index columns
            if sparsify:
                sparsify_cols = ['update date', 'updated variable',
                                 'impact date', 'impacted variable']
                if groupby_overall == 'update':
                    sparsify_cols += ['observed', 'forecast (prev)']

                for key in sparsify_cols:
                    if key in details:
                        mask = details[key] == details[key].shift(1)
                        details.loc[mask, key] = ''

            params_data = details.values
            params_header = details.columns.tolist()
            params_stubs = None

            title = 'Details'
            if len(removed_levels):
                title += ' for [' + ', '.join(removed_levels) + ']'
            return SimpleTable(params_data, params_header, params_stubs,
                               txt_fmt=fmt_params, title=title)

        if multiple_tables:
            details_table = []
            for item in details[groupby].unique():
                mask = details[groupby] == item
                item_details = details[mask].drop(groupby, axis=1)
                item_removed_levels = [f'{groupby} = {item}'] + removed_levels
                details_table.append(create_table(item_details,
                                                  item_removed_levels))
        else:
            details_table = create_table(details, removed_levels)

        return details_table

[docs] def summary_revisions(self, sparsify=True): """ Create summary table showing revisions to the previous results' data Parameters ---------- sparsify : bool, optional, default True Set to False for the table to include every one of the multiindex keys at each row. Returns ------- revisions_table : SimpleTable Table showing revisions to the previous results' data. Columns are: - `revision date` : date associated with a revised data point - `revised variable` : variable that was revised at `revision date` - `observed (prev)` : the observed value prior to the revision - `revised` : the new value after the revision """ data = self.data_revisions.sort_index().reset_index() data[['revision date', 'revised variable']] = ( data[['revision date', 'revised variable']].applymap(str)) data.iloc[:, 2:] = data.iloc[:, 2:].applymap( lambda num: '' if pd.isnull(num) else '%.2f' % num) # Sparsify the date column if sparsify: mask = data['revision date'] == data['revision date'].shift(1) data.loc[mask, 'revision date'] = '' params_data = data.values params_header = data.columns.tolist() params_stubs = None title = 'Revisions to dataset:' revisions_table = SimpleTable( params_data, params_header, params_stubs, txt_fmt=fmt_params, title=title) return revisions_table
[docs] def summary_news(self, sparsify=True): """ Create summary table showing news from new data since previous results Parameters ---------- sparsify : bool, optional, default True Set to False for the table to include every one of the multiindex keys at each row. Returns ------- updates_table : SimpleTable Table showing new datapoints that were not in the previous results' data. Columns are: - `update date` : date associated with a new data point. - `updated variable` : variable for which new data was added at `update date`. - `forecast (prev)` : the forecast value for the updated variable at the update date in the previous results object (i.e. prior to the data being available). - `observed` : the observed value of the new datapoint. See Also -------- data_updates """ data = pd.merge( self.data_updates, self.news, left_index=True, right_index=True).sort_index().reset_index() data[['update date', 'updated variable']] = ( data[['update date', 'updated variable']].applymap(str)) data.iloc[:, 2:] = data.iloc[:, 2:].applymap( lambda num: '' if pd.isnull(num) else '%.2f' % num) # Sparsify the date column if sparsify: mask = data['update date'] == data['update date'].shift(1) data.loc[mask, 'update date'] = '' params_data = data.values params_header = data.columns.tolist() params_stubs = None title = 'News from updated observations:' updates_table = SimpleTable( params_data, params_header, params_stubs, txt_fmt=fmt_params, title=title) return updates_table
def summary(self, impact_date=None, impacted_variable=None, update_date=None, updated_variable=None, impacts_groupby='impact date', details_groupby='update date', show_revisions_columns=None, sparsify=True, include_details_tables=None, include_revisions_tables=False, float_format='%.2f'): """ Create summary tables describing news and impacts Parameters ---------- impact_date : int, str, datetime, list, array, or slice, optional Observation index label or slice of labels specifying particular impact periods to display. The impact date(s) describe the periods in which impacted variables were *affected* by the news. If this argument is given, the impact and details tables will only show this impact date or dates. Note that this argument is passed to the Pandas `loc` accessor, and so it should correspond to the labels of the model's index. If the model was created with data in a list or numpy array, then these labels will be zero-indexes observation integers. impacted_variable : str, list, array, or slice, optional Observation variable label or slice of labels specifying particular impacted variables to display. The impacted variable(s) describe the variables that were *affected* by the news. If you do not know the labels for the variables, check the `endog_names` attribute of the model instance. update_date : int, str, datetime, list, array, or slice, optional Observation index label or slice of labels specifying particular updated periods to display. The updated date(s) describe the periods in which the new data points were available that generated the news). See the note on `impact_date` for details about what these labels are. updated_variable : str, list, array, or slice, optional Observation variable label or slice of labels specifying particular updated variables to display. The updated variable(s) describe the variables that were *affected* by the news. If you do not know the labels for the variables, check the `endog_names` attribute of the model instance. impacts_groupby : {impact date, impacted date} The primary variable for grouping results in the impacts table. The default is to group by update date. details_groupby : str One of "update date", "updated date", "impact date", or "impacted date". The primary variable for grouping results in the details table. Only used if the details tables are included. The default is to group by update date. show_revisions_columns : bool, optional If set to False, the impacts table will not show the impacts from data revisions or the total impacts. Default is to show the revisions and totals columns if any revisions were made and otherwise to hide them. sparsify : bool, optional, default True Set to False for the table to include every one of the multiindex keys at each row. include_details_tables : bool, optional If set to True, the summary will show tables describing the details of how news from specific updates translate into specific impacts. These tables can be very long, particularly in cases where there were many updates and in multivariate models. The default is to show detailed tables only for univariate models. include_revisions_tables : bool, optional If set to True, the summary will show tables describing the revisions and updates that lead to impacts on variables of interest. float_format : str, optional Formatter format string syntax for converting numbers to strings. Default is '%.2f'. Returns ------- summary_tables : Summary Summary tables describing news and impacts. Basic tables include: - A table with general information about the sample. - A table describing the impacts of revisions and news. - Tables describing revisions in the dataset since the previous results set (unless `include_revisions_tables=False`). In univariate models or if `include_details_tables=True`, one or more tables will additionally be included describing the details of how news from specific updates translate into specific impacts. See Also -------- summary_impacts summary_details summary_revisions summary_updates """ # Default for include_details_tables if include_details_tables is None: include_details_tables = self.updated.model.k_endog == 1 # Model specification results model = self.model.model title = 'News' def get_sample(model): if model._index_dates: ix = model._index d = ix[0] sample = ['%s' % d] d = ix[-1] sample += ['- ' + '%s' % d] else: sample = [str(0), ' - ' + str(model.nobs)] return sample previous_sample = get_sample(self.previous.model) revised_sample = get_sample(self.updated.model) # Standardize the model name as a list of str model_name = model.__class__.__name__ # Top summary table top_left = [('Model:', [model_name]), ('Date:', None), ('Time:', None)] top_right = [ ('Original sample:', [previous_sample[0]]), ('', [previous_sample[1]]), ('Update through:', [revised_sample[1][2:]]), ('No. Revisions:', [len(self.revisions_ix)]), ('No. New datapoints:', [len(self.updates_ix)])] summary = Summary() self.model.endog_names = self.model.model.endog_names summary.add_table_2cols(self, gleft=top_left, gright=top_right, title=title) table_ix = 1 # Impact table summary.tables.insert(table_ix, self.summary_impacts( impact_date=impact_date, impacted_variable=impacted_variable, groupby=impacts_groupby, show_revisions_columns=show_revisions_columns, sparsify=sparsify, float_format=float_format)) table_ix += 1 # News table if len(self.updates_iloc) > 0: summary.tables.insert( table_ix, self.summary_news(sparsify=sparsify)) table_ix += 1 # Detail tables multiple_tables = self.updated.model.k_endog > 1 details_tables = self.summary_details( impact_date=impact_date, impacted_variable=impacted_variable, groupby=details_groupby, sparsify=sparsify, float_format=float_format, multiple_tables=multiple_tables) if not multiple_tables: details_tables = [details_tables] if include_details_tables: for table in details_tables: summary.tables.insert(table_ix, table) table_ix += 1 # Revisions if include_revisions_tables and len(self.revisions_iloc) > 0: summary.tables.insert( table_ix, self.summary_revisions(sparsify=sparsify)) table_ix += 1 return summary