Source code for rex.utilities.bc_utils

# -*- coding: utf-8 -*-
"""
rex bias correction utilities.
"""
import scipy
import numpy as np
import logging


logger = logging.getLogger(__name__)


[docs] def sample_q_linear(n_samples): """Sample quantiles from 0 to 1 inclusive linearly with even spacing Parameters ---------- n_samples : int Number of points to sample between 0 and 1 Returns ------- quantiles : np.ndarray 1D array of evenly spaced samples from 0 to 1 """ quantiles = np.linspace(0, 1, n_samples) return quantiles
[docs] def sample_q_log(n_samples, log_base): """Sample quantiles from 0 to 1 while concentrating samples near quantile=0 Parameters ---------- n_samples : int Number of points to sample between 0 and 1 log_base : int | float Log base value. A higher value will concentrate more samples at the extreme sides of the distribution. Returns ------- quantiles : np.ndarray 1D array of log-spaced samples from 0 to 1 """ quantiles = np.logspace(0, 1, n_samples, base=log_base) quantiles = (quantiles - 1) / (log_base - 1) return quantiles
[docs] def sample_q_invlog(n_samples, log_base): """Sample quantiles from 0 to 1 while concentrating samples near quantile=1 Parameters ---------- n_samples : int Number of points to sample between 0 and 1 log_base : int | float Log base value. A higher value will concentrate more samples at the extreme sides of the distribution. Returns ------- quantiles : np.ndarray 1D array of log-spaced samples from 0 to 1 """ quantiles = np.logspace(0, 1, n_samples, base=log_base) quantiles = (quantiles - 1) / (log_base - 1) quantiles = np.array(sorted(1 - quantiles)) return quantiles
[docs] class QuantileDeltaMapping: """Class for quantile delta mapping based on the method from Cannon et al., 2015 Note that this is a utility class for implementing QDM and should not be requested directly as a method in the reV/rex bias correction table input Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in Quantiles and Extremes? Journal of Climate 28, 6938–6959 (2015). """ def __init__(self, params_oh, params_mh, params_mf, dist='empirical', relative=True, sampling='linear', log_base=10): """ Parameters ---------- params_oh : np.ndarray 2D array of **observed historical** distribution parameters created from a multi-year set of data where the shape is (space, N). This can be the output of a parametric distribution fit like ``scipy.stats.weibull_min.fit()`` where N is the number of parameters for that distribution, or this can define the x-values of N points from an empirical CDF that will be linearly interpolated between. If this is an empirical CDF, this must include the 0th and 100th percentile values and have even percentile spacing between values. params_mh : np.ndarray Same requirements as params_oh. This input arg is for the **modeled historical distribution**. params_mf : np.ndarray | None Same requirements as params_oh. This input arg is for the **modeled future distribution**. If this is None, this defaults to params_mh (no future data, just corrected to modeled historical distribution) dist : str | np.ndarray Probability distribution name to use to model the data which determines how the param args are used. This can "empirical" or any continuous distribution name from ``scipy.stats``. Can also be a 1D array of dist inputs if being used from reV, but they must all be the same option. relative : bool | np.ndarray Flag to preserve relative rather than absolute changes in quantiles. relative=False (default) will multiply by the change in quantiles while relative=True will add. See Equations 4-6 from Cannon et al., 2015 for more details. Can also be a 1D array of dist inputs if being used from reV, but they must all be the same option. sampling : str | np.ndarray If dist="empirical", this is an option for how the quantiles were sampled to produce the params inputs, e.g., how to sample the y-axis of the distribution (see sampling functions in ``rex.utilities.bc_utils``). "linear" will do even spacing, "log" will concentrate samples near quantile=0, and "invlog" will concentrate samples near quantile=1. Can also be a 1D array of dist inputs if being used from reV, but they must all be the same option. log_base : int | float | np.ndarray Log base value if sampling is "log" or "invlog". A higher value will concentrate more samples at the extreme sides of the distribution. Can also be a 1D array of dist inputs if being used from reV, but they must all be the same option. """ self.params_oh = params_oh self.params_mh = params_mh self.params_mf = params_mf if params_mf is not None else params_mh self.relative = bool(self._clean_kwarg(relative)) self.dist_name = str(self._clean_kwarg(dist)).casefold() self.sampling = str(self._clean_kwarg(sampling)).casefold() self.log_base = float(self._clean_kwarg(log_base)) self.scipy_dist = None if self.dist_name != 'empirical': self.scipy_dist = getattr(scipy.stats, self.dist_name, None) if self.scipy_dist is None: msg = ('Could not get requested distribution "{}" from ' '``scipy.stats``. Please double check your spelling ' 'and select "empirical" or one of the continuous ' 'distribution options from here: ' 'https://docs.scipy.org/doc/scipy/reference/stats.html' .format(self.dist_name)) logger.error(msg) raise KeyError(msg) @staticmethod def _clean_kwarg(inp): """Clean any kwargs inputs (e.g., dist, relative) that might be provided as an array and must be collapsed into a single string or boolean value""" unique = np.unique(inp) msg = ('_QuantileDeltaMapping kwargs must have only one unique input ' 'even if being called with arrays as part of reV but found: {}' .format(unique)) assert len(unique) == 1, msg while isinstance(inp, np.ndarray): inp = inp[0] return inp def _clean_params(self, params, arr_shape): """Verify and clean 2D parameter arrays for passing into empirical distribution or scipy continuous distribution functions. Parameters ---------- params : np.ndarray Input params shape should be (space, N) where N is the number of parameters for the distribution. arr_shape : tuple Array shape should be (time, space). Returns ------- params : np.ndarray | list If a scipy continuous dist is set, this output will be params unpacked along axis=1 into a list so that the list entries represent the scipy distribution parameters (e.g., shape, scale, loc) and each list entry is of shape (space,) """ msg = f'params must be 2D array but received {type(params)}' assert isinstance(params, np.ndarray), msg if len(params.shape) == 1: params = np.expand_dims(params, 0) msg = (f'params must be 2D array of shape ({arr_shape[1]}, N) ' f'but received shape {params.shape}') assert len(params.shape) == 2, msg assert params.shape[0] == arr_shape[1], msg if self.scipy_dist is not None: params = [params[:, i] for i in range(params.shape[1])] return params def _get_quantiles(self, n_samples): """If dist='empirical', this will get the quantile values for the CDF x-values specified in the input params""" if self.sampling == 'linear': quantiles = sample_q_linear(n_samples) elif self.sampling == 'log': quantiles = sample_q_log(n_samples, self.log_base) elif self.sampling == 'invlog': quantiles = sample_q_invlog(n_samples, self.log_base) else: msg = ('sampling option must be linear, log, or invlog, but ' 'received: {}'.format(self.sampling)) logger.error(msg) raise KeyError(msg) return quantiles
[docs] def cdf(self, x, params): """Run the CDF function e.g., convert physical variable to quantile""" if self.scipy_dist is None: p = np.zeros_like(x) for idx in range(x.shape[1]): xp = params[idx, :] fp = self._get_quantiles(len(xp)) p[:, idx] = np.interp(x[:, idx], xp, fp) else: p = self.scipy_dist.cdf(x, *params) return p
[docs] def ppf(self, p, params): """Run the inverse CDF function (percent point function) e.g., convert quantile to physical variable""" if self.scipy_dist is None: x = np.zeros_like(p) for idx in range(p.shape[1]): fp = params[idx, :] xp = self._get_quantiles(len(fp)) x[:, idx] = np.interp(p[:, idx], xp, fp) else: x = self.scipy_dist.ppf(p, *params) return x
[docs] def __call__(self, arr): """Run the QDM function to bias correct an array Parameters ---------- arr : np.ndarray 2D array of values in shape (time, space) Returns ------- arr : np.ndarray Bias corrected copy of the input array with same shape. """ if len(arr.shape) == 1: arr = np.expand_dims(arr, 1) params_oh = self._clean_params(self.params_oh, arr.shape) params_mh = self._clean_params(self.params_mh, arr.shape) params_mf = self._clean_params(self.params_mf, arr.shape) # Equation references are from Section 3 of Cannon et al 2015: # Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM # Precipitation by Quantile Mapping: How Well Do Methods Preserve # Changes in Quantiles and Extremes? Journal of Climate 28, 6938–6959 # (2015). q_mf = self.cdf(arr, params_mf) # Eq.3: Tau_m_p = F_m_p(x_m_p) x_oh = self.ppf(q_mf, params_oh) # Eq.5: x^_o:m_h:p = F-1_o_h(Tau_m_p) x_mh_mf = self.ppf(q_mf, params_mh) # Eq.4 denom: F-1_m_h(Tau_m_p) if self.relative: x_mh_mf[x_mh_mf == 0] = 0.001 # arbitrary limit to prevent div 0 delta = arr / x_mh_mf # Eq.4: x_m_p / F-1_m_h(Tau_m_p) arr_bc = x_oh * delta # Eq.6: x^_m_p = x^_o:m_h:p * delta else: delta = arr - x_mh_mf # Eq.4: x_m_p - F-1_m_h(Tau_m_p) arr_bc = x_oh + delta # Eq.6: x^_m_p = x^_o:m_h:p + delta msg = ('Input shape {} does not match QDM bias corrected output ' 'shape {}!'.format(arr.shape, arr_bc.shape)) assert arr.shape == arr_bc.shape, msg return arr_bc