Source code for reVX.utilities.forecasts

"""
Forecast Utilities
"""
import h5py
import logging
import numpy as np
import os
import shutil
from warnings import warn

from reVX.utilities.utilities import log_versions

from rex import Resource

logger = logging.getLogger(__name__)


[docs]class FcstUtils:
    """
    Forecast bias correcting and blending utilties
    """
    def __init__(self, fcst_h5, fcst_dset, actuals_h5=None, actuals_dset=None):
        """
        Parameters
        ----------
        fcst_h5 : str
            Path to forecast .h5 file
        fcst_dset : str
            Dataset to correct
        actuals_h5 : str, optional
            Path to forecast to .h5 file, by default None
        actuals_dset : str, optional
            Actuals dataset, by default None
        """
        log_versions(logger)
        self._fcst_h5 = fcst_h5
        if actuals_h5 is None:
            actuals_h5 = fcst_h5

        self._actuals_h5 = actuals_h5

        self._fcst_dset = fcst_dset

        if actuals_dset is None:
            actuals_dset = fcst_dset

        self._actuals_dset = actuals_dset

        self._a_slice = self._preflight_check()

    @property
    def fcst_h5(self):
        """
        Path to forecast .h5 file

        Returns
        -------
        str
        """
        return self._fcst_h5

    @property
    def actuals_h5(self):
        """
        Path to actuals .h5 file

        Returns
        -------
        str
        """
        return self._actuals_h5

    @property
    def fcst_dset(self):
        """
        Forecast dset to correct

        Returns
        -------
        str
        """
        return self._fcst_dset

    @property
    def actuals_dset(self):
        """
        Actuals dset to use to correct forecasts

        Returns
        -------
        str
        """
        return self._actuals_dset

[docs]    @staticmethod
    def bias_correct_fcst(actuals, fcsts):
        """
        Bias correct forecast data using bias correction factor:
        total actual generation / total forecasted generation

        Parameters
        ----------
        actuals : ndarray
            Timeseries actuals (time x sites)
        fcsts : ndarray
            Timeseries forecats (time x sites)

        Returns
        -------
        fcsts : ndarray
            Bias corrected forecasts
        """
        bc_factors = actuals.sum(axis=0) / fcsts.sum(axis=0)
        fcsts = fcsts * bc_factors
        actuals_max = actuals.max(axis=0)
        mask = fcsts >= actuals_max
        fcsts = np.where(mask, actuals_max, fcsts)

        return fcsts

[docs]    @classmethod
    def blend_fcsts(cls, actuals, fcsts, fcst_perc):
        """
        Bias correct and blend forecasts with actuals

        Parameters
        ----------
        actuals : ndarray
            Timeseries actuals (time x sites)
        fcsts : ndarray
            Timeseries forecats (time x sites)
        fcst_perc : float
            Percentage of forecast to use for blending

        Returns
        -------
        fcsts : ndarray
            Bias corrected and blended forecasts
        """
        fcsts = cls.bias_correct_fcst(actuals, fcsts)
        fcsts = ((actuals * (1 - fcst_perc)) + (fcsts * fcst_perc))

        return fcsts

    @classmethod
    def _correct(cls, fcst, actuals, fcst_perc=None):
        """
        Correct given data using following steps:
        - Bias correct forecast data using bias correction factor:
        total actual generation / total forecasted generation
        - Blend fcst_perc of forecast generation with (1 - fcst_perc) of
        actuals generation

        Parameters
        ----------
        actuals : ndarray
            Timeseries actuals (time x sites)
        fcsts : ndarray
            Timeseries forecats (time x sites)
        fcst_perc : float, optional
            Percentage of forecast to use for blending, by default None

        Returns
        -------
        fcsts : ndarray
            Corrected forecasts
        """
        mae = cls.compute_mae(actuals, fcst)
        logger.debug('Forecast agg MAE: {:.4f}, ave MAE {:.4f}'
                     .format(*mae))

        if fcst_perc is not None:
            logger.info('Bias correcting and blending forecasts with '
                        '{:}% actuals'.format((1 - fcst_perc) * 100))
            fcst = cls.blend_fcsts(actuals, fcst, fcst_perc)
        else:
            logger.info('Bias correcting forecasts')
            fcst = cls.bias_correct_fcst(actuals, fcst)

        mae = cls.compute_mae(actuals, fcst)
        logger.debug('Corrected forecast agg MAE: {:.4f}, '
                     'ave MAE {:.4f}'.format(*mae))

        return fcst

[docs]    @staticmethod
    def compute_mae(actuals, fcsts):
        """
        Compute aggregate and normalized average MAE between actuals and
        forecasts. The MAE is normalized by the total actuals generation

        Parameters
        ----------
        actuals : ndarray
            Timeseries actuals (time x sites)
        fcsts : ndarray
            Timeseries forecats (time x sites)

        Returns
        -------
        agg_mae : float
            Aggregate MAE for all sites
        ave_mae : float
            Average MEA for all sites
        """
        site_mae = np.mean(np.abs(fcsts - actuals), axis=0)
        site_rel_mae = site_mae / np.max(actuals, axis=0)
        ave_mae = np.nanmean(site_rel_mae)
        agg_mae = np.nansum(site_rel_mae)

        return agg_mae, ave_mae

    @staticmethod
    def _make_time_index_name(dset_name):
        """
        Make time_index name from dataset name

        Parameters
        ----------
        dset_name : str
            profiles dataset

        Returns
        -------
        str
            time_index name associated with profiles dataset
        """
        time_index_name = 'time_index'
        if '/' in dset_name:
            group = dset_name.split('/')[0]
            time_index_name = "{}/{}".format(group, time_index_name)

        return time_index_name

    def _get_actuals_slice(self):
        """
        Create actuals slice needed to match fcst time-steps

        Returns
        -------
        a_slice : slice
            Slice of actuals timeseries needed to match forecast timeseries
        """
        with Resource(self.fcst_h5) as f:
            f_shape = f.get_dset_properties(self.fcst_dset)[0]
            f_time_index = f[self._make_time_index_name(self.fcst_dset)]

        with Resource(self.actuals_h5) as f:
            a_shape = f.get_dset_properties(self.actuals_dset)[0]
            a_time_index = f[self._make_time_index_name(self.actuals_dset)]

        a_slice = slice(None, None, a_shape[0] // f_shape[0])
        if len(f_time_index) != f_shape[0]:
            msg = ('Forecast time_index does not match forecast shape, '
                   'actuals slice will be estimated!')
            logger.warning(msg)
            warn(msg)
        elif not f_time_index.equals(a_time_index[a_slice]):
            a_slice = np.where(a_time_index.isin(f_time_index))[0]

        return a_slice

    def _preflight_check(self):
        """
        Check to ensure dset is available in forecast and actuals .h5 files

        Returns
        -------
        a_slice : slice
            Slice of actuals timeseries needed to match forecast timeseries
        """
        with Resource(self.fcst_h5) as f:
            if self.fcst_dset not in f:
                msg = ('{} is not a valid dataset in forecast file: {}'
                       .format(self.fcst_dset, self.fcst_h5))
                logger.error(msg)
                raise RuntimeError(msg)
            else:
                f_shape = f.get_dset_properties(self.fcst_dset)[0]

        with Resource(self.actuals_h5) as f:
            if self.actuals_dset not in f:
                msg = ('{} is not a valid dataset in actuals file: {}'
                       .format(self.actuals_dset, self.actuals_h5))
                logger.error(msg)
                raise RuntimeError(msg)
            else:
                a_shape = f.get_dset_properties(self.actuals_dset)[0]
                a_slice = slice(None)
                if a_shape != f_shape:
                    a_slice = self._get_actuals_slice()
                    logger.debug('Extracting {} sub-slice of {} to match {} '
                                 'shape: {}'.format(a_slice, self.actuals_dset,
                                                    self.fcst_dset, f_shape))

        return a_slice

[docs]    def correct_dset(self, out_h5, fcst_perc=None):
        """
        Bias correct and blend (if requested) forecasts:
        - Bias correct forecast data using bias correction factor:
        total actual generation / total forecasted generation
        - Blend fcst_perc of forecast generation with (1 - fcst_perc) of
        actuals generation

        Parameters
        ----------
        out_h5 : str
            Output path for corrected .h5 file
        fcst_perc : float, optional
            Percentage of forecast to use for blending, by default None
        """
        if not os.path.exists(out_h5):
            logger.debug('Copying forecasts ({}) to output path ({})'
                         .format(self.fcst_h5, out_h5))
            shutil.copy(self.fcst_h5, out_h5)

        with h5py.File(out_h5, 'a') as f_out:
            with Resource(self.fcst_h5, unscale=False) as f_fcst:
                fcst = f_fcst[self.fcst_dset]

            with Resource(self.actuals_h5, unscale=False) as f_act:
                actuals = f_act[self.actuals_dset, self._a_slice]

            logger.info('Correcting {} forecasts'.format(self.fcst_dset))
            f_out[self.fcst_dset][...] = self._correct(fcst, actuals,
                                                       fcst_perc=fcst_perc)

[docs]    @classmethod
    def correct(cls, fcst_h5, fcst_dset, out_h5,
                actuals_h5=None, actuals_dset=None, fcst_perc=None):
        """
        Bias correct and blend (if requested) forecasts using actuals:
        - Bias correct forecast data using bias correction factor:
        total actual generation / total forecasted generation
        - Blend fcst_perc of forecast generation with (1 - fcst_perc) of
        actuals generation

        Parameters
        ----------
        fcst_h5 : str
            Path to forecast .h5 file
        fcst_dset : str
            Dataset to correct
        out_h5 : str
            Output path for corrected .h5 file
        actuals_h5 : str, optional
            Path to forecast to .h5 file, by default None
        actuals_dset : str, optional
            Actuals dataset, by default None
        fcst_perc : float, None
            Percentage of forecast to use for blending, by default None
        """
        fcst = cls(fcst_h5, fcst_dset, actuals_h5=actuals_h5,
                   actuals_dset=actuals_dset)
        fcst.correct_dset(out_h5, fcst_perc=fcst_perc)

[docs]    @classmethod
    def bias_correct(cls, fcst_h5, fcst_dset, out_h5,
                     actuals_h5=None, actuals_dset=None):
        """
        Bias correct forecast using actuals using bias correction factor:
        total actual generation / total forecasted generation

        Parameters
        ----------
        fcst_h5 : str
            Path to forecast .h5 file
        fcst_dset : str
            Dataset to correct
        out_h5 : str
            Output path for corrected .h5 file
        actuals_h5 : str, optional
            Path to forecast to .h5 file, by default None
        actuals_dset : str, optional
            Actuals dataset, by default None
        """
        cls.correct(fcst_h5, fcst_dset, out_h5,
                    actuals_h5=actuals_h5, actuals_dset=actuals_dset)

[docs]    @classmethod
    def blend(cls, fcst_h5, fcst_dset, out_h5, fcst_perc,
              actuals_h5=None, actuals_dset=None):
        """
        Bias correct and blend forecast using actuals:
        - Bias correct forecast data using bias correction factor:
        total actual generation / total forecasted generation
        - Blend fcst_perc of forecast generation with (1 - fcst_perc) of
        actuals generation

        Parameters
        ----------
        fcst_h5 : str
            Path to forecast .h5 file
        fcst_dset : str
            Dataset to correct
        out_h5 : str
            Output path for corrected .h5 file
        fcst_perc : float
            Percentage of forecast to use for blending
        actuals_h5 : str, optional
            Path to forecast to .h5 file, by default None
        actuals_dset : str, optional
            Actuals dataset, by default None
        """
        cls.correct(fcst_h5, fcst_dset, out_h5,
                    actuals_h5=actuals_h5, actuals_dset=actuals_dset,
                    fcst_perc=fcst_perc)