Source code for reVX.utilities.forecasts

"""
Forecast Utilities
"""
import h5py
import logging
import numpy as np
import os
import shutil
from warnings import warn

from reVX.utilities.utilities import log_versions

from rex import Resource

logger = logging.getLogger(__name__)


[docs]class FcstUtils: """ Forecast bias correcting and blending utilties """ def __init__(self, fcst_h5, fcst_dset, actuals_h5=None, actuals_dset=None): """ Parameters ---------- fcst_h5 : str Path to forecast .h5 file fcst_dset : str Dataset to correct actuals_h5 : str, optional Path to forecast to .h5 file, by default None actuals_dset : str, optional Actuals dataset, by default None """ log_versions(logger) self._fcst_h5 = fcst_h5 if actuals_h5 is None: actuals_h5 = fcst_h5 self._actuals_h5 = actuals_h5 self._fcst_dset = fcst_dset if actuals_dset is None: actuals_dset = fcst_dset self._actuals_dset = actuals_dset self._a_slice = self._preflight_check() @property def fcst_h5(self): """ Path to forecast .h5 file Returns ------- str """ return self._fcst_h5 @property def actuals_h5(self): """ Path to actuals .h5 file Returns ------- str """ return self._actuals_h5 @property def fcst_dset(self): """ Forecast dset to correct Returns ------- str """ return self._fcst_dset @property def actuals_dset(self): """ Actuals dset to use to correct forecasts Returns ------- str """ return self._actuals_dset
[docs] @staticmethod def bias_correct_fcst(actuals, fcsts): """ Bias correct forecast data using bias correction factor: total actual generation / total forecasted generation Parameters ---------- actuals : ndarray Timeseries actuals (time x sites) fcsts : ndarray Timeseries forecats (time x sites) Returns ------- fcsts : ndarray Bias corrected forecasts """ bc_factors = actuals.sum(axis=0) / fcsts.sum(axis=0) fcsts = fcsts * bc_factors actuals_max = actuals.max(axis=0) mask = fcsts >= actuals_max fcsts = np.where(mask, actuals_max, fcsts) return fcsts
[docs] @classmethod def blend_fcsts(cls, actuals, fcsts, fcst_perc): """ Bias correct and blend forecasts with actuals Parameters ---------- actuals : ndarray Timeseries actuals (time x sites) fcsts : ndarray Timeseries forecats (time x sites) fcst_perc : float Percentage of forecast to use for blending Returns ------- fcsts : ndarray Bias corrected and blended forecasts """ fcsts = cls.bias_correct_fcst(actuals, fcsts) fcsts = ((actuals * (1 - fcst_perc)) + (fcsts * fcst_perc)) return fcsts
@classmethod def _correct(cls, fcst, actuals, fcst_perc=None): """ Correct given data using following steps: - Bias correct forecast data using bias correction factor: total actual generation / total forecasted generation - Blend fcst_perc of forecast generation with (1 - fcst_perc) of actuals generation Parameters ---------- actuals : ndarray Timeseries actuals (time x sites) fcsts : ndarray Timeseries forecats (time x sites) fcst_perc : float, optional Percentage of forecast to use for blending, by default None Returns ------- fcsts : ndarray Corrected forecasts """ mae = cls.compute_mae(actuals, fcst) logger.debug('Forecast agg MAE: {:.4f}, ave MAE {:.4f}' .format(*mae)) if fcst_perc is not None: logger.info('Bias correcting and blending forecasts with ' '{:}% actuals'.format((1 - fcst_perc) * 100)) fcst = cls.blend_fcsts(actuals, fcst, fcst_perc) else: logger.info('Bias correcting forecasts') fcst = cls.bias_correct_fcst(actuals, fcst) mae = cls.compute_mae(actuals, fcst) logger.debug('Corrected forecast agg MAE: {:.4f}, ' 'ave MAE {:.4f}'.format(*mae)) return fcst
[docs] @staticmethod def compute_mae(actuals, fcsts): """ Compute aggregate and normalized average MAE between actuals and forecasts. The MAE is normalized by the total actuals generation Parameters ---------- actuals : ndarray Timeseries actuals (time x sites) fcsts : ndarray Timeseries forecats (time x sites) Returns ------- agg_mae : float Aggregate MAE for all sites ave_mae : float Average MEA for all sites """ site_mae = np.mean(np.abs(fcsts - actuals), axis=0) site_rel_mae = site_mae / np.max(actuals, axis=0) ave_mae = np.nanmean(site_rel_mae) agg_mae = np.nansum(site_rel_mae) return agg_mae, ave_mae
@staticmethod def _make_time_index_name(dset_name): """ Make time_index name from dataset name Parameters ---------- dset_name : str profiles dataset Returns ------- str time_index name associated with profiles dataset """ time_index_name = 'time_index' if '/' in dset_name: group = dset_name.split('/')[0] time_index_name = "{}/{}".format(group, time_index_name) return time_index_name def _get_actuals_slice(self): """ Create actuals slice needed to match fcst time-steps Returns ------- a_slice : slice Slice of actuals timeseries needed to match forecast timeseries """ with Resource(self.fcst_h5) as f: f_shape = f.get_dset_properties(self.fcst_dset)[0] f_time_index = f[self._make_time_index_name(self.fcst_dset)] with Resource(self.actuals_h5) as f: a_shape = f.get_dset_properties(self.actuals_dset)[0] a_time_index = f[self._make_time_index_name(self.actuals_dset)] a_slice = slice(None, None, a_shape[0] // f_shape[0]) if len(f_time_index) != f_shape[0]: msg = ('Forecast time_index does not match forecast shape, ' 'actuals slice will be estimated!') logger.warning(msg) warn(msg) elif not f_time_index.equals(a_time_index[a_slice]): a_slice = np.where(a_time_index.isin(f_time_index))[0] return a_slice def _preflight_check(self): """ Check to ensure dset is available in forecast and actuals .h5 files Returns ------- a_slice : slice Slice of actuals timeseries needed to match forecast timeseries """ with Resource(self.fcst_h5) as f: if self.fcst_dset not in f: msg = ('{} is not a valid dataset in forecast file: {}' .format(self.fcst_dset, self.fcst_h5)) logger.error(msg) raise RuntimeError(msg) else: f_shape = f.get_dset_properties(self.fcst_dset)[0] with Resource(self.actuals_h5) as f: if self.actuals_dset not in f: msg = ('{} is not a valid dataset in actuals file: {}' .format(self.actuals_dset, self.actuals_h5)) logger.error(msg) raise RuntimeError(msg) else: a_shape = f.get_dset_properties(self.actuals_dset)[0] a_slice = slice(None) if a_shape != f_shape: a_slice = self._get_actuals_slice() logger.debug('Extracting {} sub-slice of {} to match {} ' 'shape: {}'.format(a_slice, self.actuals_dset, self.fcst_dset, f_shape)) return a_slice
[docs] def correct_dset(self, out_h5, fcst_perc=None): """ Bias correct and blend (if requested) forecasts: - Bias correct forecast data using bias correction factor: total actual generation / total forecasted generation - Blend fcst_perc of forecast generation with (1 - fcst_perc) of actuals generation Parameters ---------- out_h5 : str Output path for corrected .h5 file fcst_perc : float, optional Percentage of forecast to use for blending, by default None """ if not os.path.exists(out_h5): logger.debug('Copying forecasts ({}) to output path ({})' .format(self.fcst_h5, out_h5)) shutil.copy(self.fcst_h5, out_h5) with h5py.File(out_h5, 'a') as f_out: with Resource(self.fcst_h5, unscale=False) as f_fcst: fcst = f_fcst[self.fcst_dset] with Resource(self.actuals_h5, unscale=False) as f_act: actuals = f_act[self.actuals_dset, self._a_slice] logger.info('Correcting {} forecasts'.format(self.fcst_dset)) f_out[self.fcst_dset][...] = self._correct(fcst, actuals, fcst_perc=fcst_perc)
[docs] @classmethod def correct(cls, fcst_h5, fcst_dset, out_h5, actuals_h5=None, actuals_dset=None, fcst_perc=None): """ Bias correct and blend (if requested) forecasts using actuals: - Bias correct forecast data using bias correction factor: total actual generation / total forecasted generation - Blend fcst_perc of forecast generation with (1 - fcst_perc) of actuals generation Parameters ---------- fcst_h5 : str Path to forecast .h5 file fcst_dset : str Dataset to correct out_h5 : str Output path for corrected .h5 file actuals_h5 : str, optional Path to forecast to .h5 file, by default None actuals_dset : str, optional Actuals dataset, by default None fcst_perc : float, None Percentage of forecast to use for blending, by default None """ fcst = cls(fcst_h5, fcst_dset, actuals_h5=actuals_h5, actuals_dset=actuals_dset) fcst.correct_dset(out_h5, fcst_perc=fcst_perc)
[docs] @classmethod def bias_correct(cls, fcst_h5, fcst_dset, out_h5, actuals_h5=None, actuals_dset=None): """ Bias correct forecast using actuals using bias correction factor: total actual generation / total forecasted generation Parameters ---------- fcst_h5 : str Path to forecast .h5 file fcst_dset : str Dataset to correct out_h5 : str Output path for corrected .h5 file actuals_h5 : str, optional Path to forecast to .h5 file, by default None actuals_dset : str, optional Actuals dataset, by default None """ cls.correct(fcst_h5, fcst_dset, out_h5, actuals_h5=actuals_h5, actuals_dset=actuals_dset)
[docs] @classmethod def blend(cls, fcst_h5, fcst_dset, out_h5, fcst_perc, actuals_h5=None, actuals_dset=None): """ Bias correct and blend forecast using actuals: - Bias correct forecast data using bias correction factor: total actual generation / total forecasted generation - Blend fcst_perc of forecast generation with (1 - fcst_perc) of actuals generation Parameters ---------- fcst_h5 : str Path to forecast .h5 file fcst_dset : str Dataset to correct out_h5 : str Output path for corrected .h5 file fcst_perc : float Percentage of forecast to use for blending actuals_h5 : str, optional Path to forecast to .h5 file, by default None actuals_dset : str, optional Actuals dataset, by default None """ cls.correct(fcst_h5, fcst_dset, out_h5, actuals_h5=actuals_h5, actuals_dset=actuals_dset, fcst_perc=fcst_perc)