# -*- coding: utf-8 -*-
"""
rex bias correction utilities.
"""
import scipy
import numpy as np
import logging
logger = logging.getLogger(__name__)
[docs]
def sample_q_linear(n_samples):
"""Sample quantiles from 0 to 1 inclusive linearly with even spacing
Parameters
----------
n_samples : int
Number of points to sample between 0 and 1
Returns
-------
quantiles : np.ndarray
1D array of evenly spaced samples from 0 to 1
"""
quantiles = np.linspace(0, 1, n_samples)
return quantiles
[docs]
def sample_q_log(n_samples, log_base):
"""Sample quantiles from 0 to 1 while concentrating samples near quantile=0
Parameters
----------
n_samples : int
Number of points to sample between 0 and 1
log_base : int | float
Log base value. A higher value will concentrate more samples at the
extreme sides of the distribution.
Returns
-------
quantiles : np.ndarray
1D array of log-spaced samples from 0 to 1
"""
quantiles = np.logspace(0, 1, n_samples, base=log_base)
quantiles = (quantiles - 1) / (log_base - 1)
return quantiles
[docs]
def sample_q_invlog(n_samples, log_base):
"""Sample quantiles from 0 to 1 while concentrating samples near quantile=1
Parameters
----------
n_samples : int
Number of points to sample between 0 and 1
log_base : int | float
Log base value. A higher value will concentrate more samples at the
extreme sides of the distribution.
Returns
-------
quantiles : np.ndarray
1D array of log-spaced samples from 0 to 1
"""
quantiles = np.logspace(0, 1, n_samples, base=log_base)
quantiles = (quantiles - 1) / (log_base - 1)
quantiles = np.array(sorted(1 - quantiles))
return quantiles
[docs]
class QuantileDeltaMapping:
"""Class for quantile delta mapping based on the method from
Cannon et al., 2015
Note that this is a utility class for implementing QDM and should not be
requested directly as a method in the reV/rex bias correction table input
Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM
Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in
Quantiles and Extremes? Journal of Climate 28, 6938–6959 (2015).
"""
def __init__(self, params_oh, params_mh, params_mf, dist='empirical',
relative=True, sampling='linear', log_base=10):
"""
Parameters
----------
params_oh : np.ndarray
2D array of **observed historical** distribution parameters created
from a multi-year set of data where the shape is (space, N). This
can be the output of a parametric distribution fit like
``scipy.stats.weibull_min.fit()`` where N is the number of
parameters for that distribution, or this can define the x-values
of N points from an empirical CDF that will be linearly
interpolated between. If this is an empirical CDF, this must
include the 0th and 100th percentile values and have even
percentile spacing between values.
params_mh : np.ndarray
Same requirements as params_oh. This input arg is for the **modeled
historical distribution**.
params_mf : np.ndarray | None
Same requirements as params_oh. This input arg is for the **modeled
future distribution**. If this is None, this defaults to params_mh
(no future data, just corrected to modeled historical distribution)
dist : str | np.ndarray
Probability distribution name to use to model the data which
determines how the param args are used. This can "empirical" or any
continuous distribution name from ``scipy.stats``. Can also be a 1D
array of dist inputs if being used from reV, but they must all be
the same option.
relative : bool | np.ndarray
Flag to preserve relative rather than absolute changes in
quantiles. relative=False (default) will multiply by the change in
quantiles while relative=True will add. See Equations 4-6 from
Cannon et al., 2015 for more details. Can also be a 1D array of
dist inputs if being used from reV, but they must all be the same
option.
sampling : str | np.ndarray
If dist="empirical", this is an option for how the quantiles were
sampled to produce the params inputs, e.g., how to sample the
y-axis of the distribution (see sampling functions in
``rex.utilities.bc_utils``). "linear" will do even spacing, "log"
will concentrate samples near quantile=0, and "invlog" will
concentrate samples near quantile=1. Can also be a 1D array of dist
inputs if being used from reV, but they must all be the same
option.
log_base : int | float | np.ndarray
Log base value if sampling is "log" or "invlog". A higher value
will concentrate more samples at the extreme sides of the
distribution. Can also be a 1D array of dist inputs if being used
from reV, but they must all be the same option.
"""
self.params_oh = params_oh
self.params_mh = params_mh
self.params_mf = params_mf if params_mf is not None else params_mh
self.relative = bool(self._clean_kwarg(relative))
self.dist_name = str(self._clean_kwarg(dist)).casefold()
self.sampling = str(self._clean_kwarg(sampling)).casefold()
self.log_base = float(self._clean_kwarg(log_base))
self.scipy_dist = None
if self.dist_name != 'empirical':
self.scipy_dist = getattr(scipy.stats, self.dist_name, None)
if self.scipy_dist is None:
msg = ('Could not get requested distribution "{}" from '
'``scipy.stats``. Please double check your spelling '
'and select "empirical" or one of the continuous '
'distribution options from here: '
'https://docs.scipy.org/doc/scipy/reference/stats.html'
.format(self.dist_name))
logger.error(msg)
raise KeyError(msg)
@staticmethod
def _clean_kwarg(inp):
"""Clean any kwargs inputs (e.g., dist, relative) that might be
provided as an array and must be collapsed into a single string or
boolean value"""
unique = np.unique(inp)
msg = ('_QuantileDeltaMapping kwargs must have only one unique input '
'even if being called with arrays as part of reV but found: {}'
.format(unique))
assert len(unique) == 1, msg
while isinstance(inp, np.ndarray):
inp = inp[0]
return inp
def _clean_params(self, params, arr_shape):
"""Verify and clean 2D parameter arrays for passing into empirical
distribution or scipy continuous distribution functions.
Parameters
----------
params : np.ndarray
Input params shape should be (space, N) where N is the number of
parameters for the distribution.
arr_shape : tuple
Array shape should be (time, space).
Returns
-------
params : np.ndarray | list
If a scipy continuous dist is set, this output will be params
unpacked along axis=1 into a list so that the list entries
represent the scipy distribution parameters
(e.g., shape, scale, loc) and each list entry is of shape (space,)
"""
msg = f'params must be 2D array but received {type(params)}'
assert isinstance(params, np.ndarray), msg
if len(params.shape) == 1:
params = np.expand_dims(params, 0)
msg = (f'params must be 2D array of shape ({arr_shape[1]}, N) '
f'but received shape {params.shape}')
assert len(params.shape) == 2, msg
assert params.shape[0] == arr_shape[1], msg
if self.scipy_dist is not None:
params = [params[:, i] for i in range(params.shape[1])]
return params
def _get_quantiles(self, n_samples):
"""If dist='empirical', this will get the quantile values for the CDF
x-values specified in the input params"""
if self.sampling == 'linear':
quantiles = sample_q_linear(n_samples)
elif self.sampling == 'log':
quantiles = sample_q_log(n_samples, self.log_base)
elif self.sampling == 'invlog':
quantiles = sample_q_invlog(n_samples, self.log_base)
else:
msg = ('sampling option must be linear, log, or invlog, but '
'received: {}'.format(self.sampling))
logger.error(msg)
raise KeyError(msg)
return quantiles
[docs]
def cdf(self, x, params):
"""Run the CDF function e.g., convert physical variable to quantile"""
if self.scipy_dist is None:
p = np.zeros_like(x)
for idx in range(x.shape[1]):
xp = params[idx, :]
fp = self._get_quantiles(len(xp))
p[:, idx] = np.interp(x[:, idx], xp, fp)
else:
p = self.scipy_dist.cdf(x, *params)
return p
[docs]
def ppf(self, p, params):
"""Run the inverse CDF function (percent point function) e.g., convert
quantile to physical variable"""
if self.scipy_dist is None:
x = np.zeros_like(p)
for idx in range(p.shape[1]):
fp = params[idx, :]
xp = self._get_quantiles(len(fp))
x[:, idx] = np.interp(p[:, idx], xp, fp)
else:
x = self.scipy_dist.ppf(p, *params)
return x
[docs]
def __call__(self, arr):
"""Run the QDM function to bias correct an array
Parameters
----------
arr : np.ndarray
2D array of values in shape (time, space)
Returns
-------
arr : np.ndarray
Bias corrected copy of the input array with same shape.
"""
if len(arr.shape) == 1:
arr = np.expand_dims(arr, 1)
params_oh = self._clean_params(self.params_oh, arr.shape)
params_mh = self._clean_params(self.params_mh, arr.shape)
params_mf = self._clean_params(self.params_mf, arr.shape)
# Equation references are from Section 3 of Cannon et al 2015:
# Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM
# Precipitation by Quantile Mapping: How Well Do Methods Preserve
# Changes in Quantiles and Extremes? Journal of Climate 28, 6938–6959
# (2015).
q_mf = self.cdf(arr, params_mf) # Eq.3: Tau_m_p = F_m_p(x_m_p)
x_oh = self.ppf(q_mf, params_oh) # Eq.5: x^_o:m_h:p = F-1_o_h(Tau_m_p)
x_mh_mf = self.ppf(q_mf, params_mh) # Eq.4 denom: F-1_m_h(Tau_m_p)
if self.relative:
x_mh_mf[x_mh_mf == 0] = 0.001 # arbitrary limit to prevent div 0
delta = arr / x_mh_mf # Eq.4: x_m_p / F-1_m_h(Tau_m_p)
arr_bc = x_oh * delta # Eq.6: x^_m_p = x^_o:m_h:p * delta
else:
delta = arr - x_mh_mf # Eq.4: x_m_p - F-1_m_h(Tau_m_p)
arr_bc = x_oh + delta # Eq.6: x^_m_p = x^_o:m_h:p + delta
msg = ('Input shape {} does not match QDM bias corrected output '
'shape {}!'.format(arr.shape, arr_bc.shape))
assert arr.shape == arr_bc.shape, msg
return arr_bc